def pred_abs_error(y_true, y_pred): y_pred = K.cast(K.argmax(y_pred, axis=-1), y_pred.dtype) y_true = K.cast(K.sum(y_true, axis=-1), y_pred.dtype) return K.mean(K.abs(y_pred - y_true))
def pixel_accuracy(y_true, y_pred): # Convert prediction into labels by choosing the highest-scored class y_pred = K.cast(K.argmax(y_pred, axis=-1), y_true.dtype) return K.mean(K.equal(K.flatten(y_true), K.flatten(y_pred)))
def mean_squareroot_error(y_true, y_pred): if not K.is_tensor(y_pred): y_pred = K.constant(y_pred) y_true = K.cast(y_true, y_pred.dtype) return K.mean(K.sqrt(K.abs(y_pred - y_true) + 0.00000001), axis=-1)
def accuracy(y_true, y_pred): '''Compute classification accuracy with a fixed threshold on distances. ''' return K.mean(K.equal(y_true, K.cast(y_pred > 0.5, y_true.dtype)))
def yolo3_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0, elim_grid_sense=False, use_focal_loss=False, use_focal_obj_loss=False, use_softmax_loss=False, use_giou_loss=False, use_diou_loss=True): ''' YOLOv3 loss function. Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] if num_layers == 3: anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] scale_x_y = [1.05, 1.1, 1.2] if elim_grid_sense else [None, None, None] else: anchor_mask = [[3, 4, 5], [0, 1, 2]] scale_x_y = [1.05, 1.05] if elim_grid_sense else [None, None] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 total_location_loss = 0 total_confidence_loss = 0 total_class_loss = 0 batch_size = K.shape(yolo_outputs[0])[0] # batch size, tensor batch_size_f = K.cast(batch_size, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] if label_smoothing: true_class_probs = _smooth_labels(true_class_probs, label_smoothing) true_objectness_probs = _smooth_labels(object_mask, label_smoothing) else: true_objectness_probs = object_mask grid, raw_pred, pred_xy, pred_wh = yolo3_decode( yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, scale_x_y=scale_x_y[l], calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = tf.while_loop(lambda b, *args: b < batch_size, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) if use_focal_obj_loss: # Focal loss for objectness confidence confidence_loss = sigmoid_focal_loss(true_objectness_probs, raw_pred[..., 4:5]) else: confidence_loss = object_mask * K.binary_crossentropy(true_objectness_probs, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask if use_focal_loss: # Focal loss for classification score if use_softmax_loss: class_loss = softmax_focal_loss(true_class_probs, raw_pred[..., 5:]) else: class_loss = sigmoid_focal_loss(true_class_probs, raw_pred[..., 5:]) else: if use_softmax_loss: # use softmax style classification output class_loss = object_mask * K.expand_dims( K.categorical_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True), axis=-1) else: # use sigmoid style classification output class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) if use_giou_loss: # Calculate GIoU loss as location loss raw_true_box = y_true[l][..., 0:4] giou = box_giou(raw_true_box, pred_box) giou_loss = object_mask * box_loss_scale * (1 - giou) giou_loss = K.sum(giou_loss) / batch_size_f location_loss = giou_loss elif use_diou_loss: # Calculate DIoU loss as location loss raw_true_box = y_true[l][..., 0:4] diou = box_diou(raw_true_box, pred_box) diou_loss = object_mask * box_loss_scale * (1 - diou) diou_loss = K.sum(diou_loss) / batch_size_f location_loss = diou_loss else: # Standard YOLOv3 location loss # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) xy_loss = K.sum(xy_loss) / batch_size_f wh_loss = K.sum(wh_loss) / batch_size_f location_loss = xy_loss + wh_loss confidence_loss = K.sum(confidence_loss) / batch_size_f class_loss = K.sum(class_loss) / batch_size_f loss += location_loss + confidence_loss + class_loss total_location_loss += location_loss total_confidence_loss += confidence_loss total_class_loss += class_loss # Fit for tf 2.0.0 loss shape loss = K.expand_dims(loss, axis=-1) return loss, total_location_loss, total_confidence_loss, total_class_loss
def accuracy(y_true, y_pred): y_true = tf.matmul(y_true, y_true, transpose_b=True) return K.mean(K.cast(K.abs(y_true - y_pred) < margin, 'float32'))
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) # first update the number of iterations self.updates = [K.update_add(self.iterations, 1)] # Cycling Gaussian LR # I implement this lr_f = lambda x,b,c,s: b+ s*np.exp(-(x-c)**2/(c*0.5)**2) def gauss_lr(min_lr, max_lr, center, lrsigma, i): return (min_lr + max_lr * K.exp(-(i - center)**2 / (center * lrsigma)**2)) ite_casted = K.cast(self.iterations, K.dtype(self.peaklriter)) all_lr = gauss_lr(self.min_lr['all'], self.peak_lr['all'], self.peaklriter, self.lrsigma, ite_casted) #current_lr = self.min_lr['all'] + #self.peak_lr['all']*K.exp(((ite_casted-self.peaklriter)**2)/(self.dropsigma*self.peaklriter)**2) ############################################################################ self.updates.append(K.update(self.lr['all'], all_lr)) shapes = [K.int_shape(p) for p in params] moments = [K.zeros(s) for s in shapes] self.weights = [self.iterations] + moments #print(self.weights) for p, g, m in zip(params, grads, moments): #print("HEREEEE:", p.name, g, m) lrptrkey = set_pattern_find(p.name, self.lr.keys()) if lrptrkey: if self.verbose > 0: print("Setting different learning rate for ", p.name, " : ", K.eval(self.lr[lrptrkey])) if set_pattern_find(p.name, self.min_lr.keys()) and set_pattern_find( p.name, self.peak_lr.keys()): p_lr = gauss_lr(self.min_lr[lrptrkey], self.peak_lr[lrptrkey], self.peaklriter, self.lrsigma, ite_casted) else: p_lr = gauss_lr(self.min_lr['all'], self.peak_lr['all'], self.peaklriter, self.lrsigma, ite_casted) else: p_lr = self.lr['all'] momptrkey = set_pattern_find(p.name, self.momentum.keys()) if momptrkey: if self.verbose > 0: print("Setting different momentum for ", p.name, " , ", K.eval(self.momentum[momptrkey])) momentum = self.momentum[momptrkey] else: momentum = self.momentum['all'] if self.nesterov: updt = momentum * (momentum * m - p_lr * g) - p_lr * g else: updt = momentum * m - p_lr * g # CHANGE CLIP _to_tensor = K.tensorflow_backend._to_tensor _clip_by_val = K.tf.clip_by_value margin = K.mean(K.abs(p)) * K.constant(self.UPCLIP) #margin = K.mean(K.abs(p*K.constant(self.UPCLIP))) #min_value = _to_tensor(-margin, p.dtype.base_dtype) #max_value = _to_tensor(margin, p.dtype.base_dtype) #max_v = K.maximum(min_value, max_value) min_v = K.zeros_like(margin) updt_sign = K.sign(updt) updt_val = _clip_by_val(K.abs(updt), min_v, margin) v = updt_sign * updt_val # velocity new_p = p + v self.updates.append(K.update(m, v)) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) clptrkey = set_pattern_find(p.name, self.clips.keys()) if self.clips_val and clptrkey: c = K.eval(self.clips[clptrkey]) if self.verbose > 0: print("Clipping variable", p.name, " to ", c) #input() new_p = K.clip(new_p, c[0], c[1]) #print("updates for ", p.name, " lr: ", K.eval(lr), " mom:", K.eval(momentum)) self.updates.append(K.update(p, new_p)) return self.updates
def _process_sample(args): _hm, _reg, _wh, _kps, _hm_hp, _hp_offset = args _scores, _inds = tf.math.top_k(_hm, k=k, sorted=True) _classes = K.cast(_inds % cat, 'float32') _inds = K.cast(_inds / cat, 'int32') _xs = K.cast(_inds % width, 'float32') _ys = K.cast(K.cast(_inds / width, 'int32'), 'float32') _wh = K.gather(_wh, _inds) _reg = K.gather(_reg, _inds) _kps = K.gather(_kps, _inds) # shift keypoints by their center _kps_x = _kps[:, ::2] _kps_y = _kps[:, 1::2] _kps_x = _kps_x + K.expand_dims(_xs, -1) # k x J _kps_y = _kps_y + K.expand_dims(_ys, -1) # k x J _kps = K.stack([_kps_x, _kps_y], -1) # k x J x 2 _xs = _xs + _reg[..., 0] _ys = _ys + _reg[..., 1] _x1 = _xs - _wh[..., 0] / 2 _y1 = _ys - _wh[..., 1] / 2 _x2 = _xs + _wh[..., 0] / 2 _y2 = _ys + _wh[..., 1] / 2 # snap center keypoints to the closest heatmap keypoint def _process_channel(args): __kps, __hm_hp = args thresh = 0.1 __hm_scores, __hm_inds = tf.math.top_k(__hm_hp, k=k, sorted=True) __hm_xs = K.cast(__hm_inds % width, 'float32') __hm_ys = K.cast(K.cast(__hm_inds / width, 'int32'), 'float32') __hp_offset = K.gather(_hp_offset, __hm_inds) __hm_xs = __hm_xs + __hp_offset[..., 0] __hm_ys = __hm_ys + __hp_offset[..., 1] mask = K.cast(__hm_scores > thresh, 'float32') __hm_scores = (1. - mask) * -1. + mask * __hm_scores __hm_xs = (1. - mask) * -10000. + mask * __hm_xs __hm_ys = (1. - mask) * -10000. + mask * __hm_ys __hm_kps = K.stack([__hm_xs, __hm_ys], -1) # k x 2 __broadcast_hm_kps = K.expand_dims(__hm_kps, 1) # k x 1 x 2 __broadcast_kps = K.expand_dims(__kps, 0) # 1 x k x 2 dist = K.sqrt( K.sum(K.pow(__broadcast_kps - __broadcast_hm_kps, 2), 2)) # k, k min_dist = K.min(dist, 0) min_ind = K.argmin(dist, 0) __hm_scores = K.gather(__hm_scores, min_ind) __hm_kps = K.gather(__hm_kps, min_ind) mask = (K.cast(__hm_kps[..., 0] < _x1, 'float32') + K.cast(__hm_kps[..., 0] > _x2, 'float32') + K.cast(__hm_kps[..., 1] < _y1, 'float32') + K.cast(__hm_kps[..., 1] > _y2, 'float32') + K.cast(__hm_scores < thresh, 'float32') + K.cast( min_dist > 0.3 * (K.maximum(_wh[..., 0], _wh[..., 1])), 'float32')) mask = K.expand_dims(mask, -1) mask = K.cast(mask > 0, 'float32') __kps = (1. - mask) * __hm_kps + mask * __kps return __kps _kps = K.permute_dimensions(_kps, (1, 0, 2)) # J x k x 2 _hm_hp = K.permute_dimensions(_hm_hp, (1, 0)) # J x -1 _kps = K.map_fn(_process_channel, [_kps, _hm_hp], dtype='float32') _kps = K.reshape(K.permute_dimensions(_kps, (1, 2, 0)), (k, -1)) # k x J * 2 # rescale to image coordinates _x1 = output_stride * _x1 _y1 = output_stride * _y1 _x2 = output_stride * _x2 _y2 = output_stride * _y2 _kps = output_stride * _kps _boxes = K.stack([_x1, _y1, _x2, _y2], -1) _scores = K.expand_dims(_scores, -1) _classes = K.expand_dims(_classes, -1) _detection = K.concatenate([_boxes, _scores, _kps, _classes], -1) return _detection
def custom_mse(y_true, y_pred): # assume 1st dimension is the number of samples keep = tfk.cast(tfk.not_equal(y_true, missing_value), tfk.floatx()) mse = tfk.mean(tfk.square((y_pred - y_true) * keep), axis=2) return mse
def _process_channel(args): __kps, __hm_hp = args thresh = 0.1 __hm_scores, __hm_inds = tf.math.top_k(__hm_hp, k=k, sorted=True) __hm_xs = K.cast(__hm_inds % width, 'float32') __hm_ys = K.cast(K.cast(__hm_inds / width, 'int32'), 'float32') __hp_offset = K.gather(_hp_offset, __hm_inds) __hm_xs = __hm_xs + __hp_offset[..., 0] __hm_ys = __hm_ys + __hp_offset[..., 1] mask = K.cast(__hm_scores > thresh, 'float32') __hm_scores = (1. - mask) * -1. + mask * __hm_scores __hm_xs = (1. - mask) * -10000. + mask * __hm_xs __hm_ys = (1. - mask) * -10000. + mask * __hm_ys __hm_kps = K.stack([__hm_xs, __hm_ys], -1) # k x 2 __broadcast_hm_kps = K.expand_dims(__hm_kps, 1) # k x 1 x 2 __broadcast_kps = K.expand_dims(__kps, 0) # 1 x k x 2 dist = K.sqrt( K.sum(K.pow(__broadcast_kps - __broadcast_hm_kps, 2), 2)) # k, k min_dist = K.min(dist, 0) min_ind = K.argmin(dist, 0) __hm_scores = K.gather(__hm_scores, min_ind) __hm_kps = K.gather(__hm_kps, min_ind) mask = (K.cast(__hm_kps[..., 0] < _x1, 'float32') + K.cast(__hm_kps[..., 0] > _x2, 'float32') + K.cast(__hm_kps[..., 1] < _y1, 'float32') + K.cast(__hm_kps[..., 1] > _y2, 'float32') + K.cast(__hm_scores < thresh, 'float32') + K.cast( min_dist > 0.3 * (K.maximum(_wh[..., 0], _wh[..., 1])), 'float32')) mask = K.expand_dims(mask, -1) mask = K.cast(mask > 0, 'float32') __kps = (1. - mask) * __hm_kps + mask * __kps return __kps
def _nms(heat, kernel=3): hmax = K.pool2d(heat, (kernel, kernel), padding='same', pool_mode='max') keep = K.cast(K.equal(hmax, heat), K.floatx()) return heat * keep
def iou(y_true, y_pred, thres=0.5, label=1): y_pred = K.cast(K.greater(y_pred, thres), dtype='float32') intersection = K.sum(y_true * y_pred) union = K.sum(y_true) + K.sum(y_pred) - intersection return intersection / union
def ord_pred_abs_error(y_true, y_pred, threshold=0.0): y_pred = K.sum(K.cast(y_pred > threshold, y_pred.dtype), axis=-1) y_true = K.sum(y_true, axis=-1) return K.mean(K.abs(y_pred - y_true))
def ord_pred_accuracy(y_true, y_pred, threshold=0.0): y_pred = K.sum(K.cast(y_pred > threshold, y_pred.dtype), axis=-1) y_true = K.sum(y_true, axis=-1) return K.mean(K.equal(y_true, y_pred))
def yolo2_loss(args, anchors, num_classes, label_smoothing=0, elim_grid_sense=False, use_crossentropy_loss=False, use_crossentropy_obj_loss=False, rescore_confidence=False, use_giou_loss=False, use_diou_loss=False): """ YOLOv2 loss function. Parameters ---------- yolo_output : tensor Final convolutional layer features. y_true : array output of preprocess_true_boxes, with shape [conv_height, conv_width, num_anchors, 6] anchors : tensor Anchor boxes for model. num_classes : int Number of object classes. rescore_confidence : bool, default=False If true then set confidence target to IOU of best predicted box with the closest matching ground truth box. Returns ------- total_loss : float total mean YOLOv2 loss across minibatch """ (yolo_output, y_true) = args num_anchors = len(anchors) scale_x_y = 1.05 if elim_grid_sense else None yolo_output_shape = K.shape(yolo_output) input_shape = K.cast(yolo_output_shape[1:3] * 32, K.dtype(y_true)) grid_shape = K.cast(yolo_output_shape[1:3], K.dtype(y_true)) # height, width batch_size_f = K.cast(yolo_output_shape[0], K.dtype(yolo_output)) # batch size, float tensor object_scale = 5 no_object_scale = 1 class_scale = 1 location_scale = 1 grid, raw_pred, pred_xy, pred_wh = yolo2_decode(yolo_output, anchors, num_classes, input_shape, scale_x_y=scale_x_y, calc_loss=True) pred_confidence = K.sigmoid(raw_pred[..., 4:5]) pred_class_prob = K.softmax(raw_pred[..., 5:]) object_mask = y_true[..., 4:5] # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_boxes = K.concatenate([pred_xy, pred_wh]) pred_boxes = K.expand_dims(pred_boxes, 4) raw_true_boxes = y_true[..., 0:4] raw_true_boxes = K.expand_dims(raw_true_boxes, 4) iou_scores = box_iou(pred_boxes, raw_true_boxes) iou_scores = K.squeeze(iou_scores, axis=0) # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) # Determine confidence weights from object and no_object weights. # NOTE: YOLOv2 does not use binary cross-entropy. Here we try it. no_object_weights = (no_object_scale * (1 - object_detections) * (1 - object_mask)) if use_crossentropy_obj_loss: no_objects_loss = no_object_weights * K.binary_crossentropy( K.zeros(K.shape(pred_confidence)), pred_confidence, from_logits=False) if rescore_confidence: objects_loss = (object_scale * object_mask * K.binary_crossentropy( best_ious, pred_confidence, from_logits=False)) else: objects_loss = ( object_scale * object_mask * K.binary_crossentropy(K.ones(K.shape(pred_confidence)), pred_confidence, from_logits=False)) else: no_objects_loss = no_object_weights * K.square(-pred_confidence) if rescore_confidence: objects_loss = (object_scale * object_mask * K.square(best_ious - pred_confidence)) else: objects_loss = (object_scale * object_mask * K.square(1 - pred_confidence)) confidence_loss = objects_loss + no_objects_loss # Classification loss for matching detections. # NOTE: YOLOv2 does not use categorical cross-entropy loss. # Here we try it. matching_classes = K.cast(y_true[..., 5], 'int32') matching_classes = K.one_hot(matching_classes, num_classes) if label_smoothing: matching_classes = _smooth_labels(matching_classes, label_smoothing) if use_crossentropy_loss: classification_loss = ( class_scale * object_mask * K.expand_dims(K.categorical_crossentropy( matching_classes, pred_class_prob, from_logits=False), axis=-1)) else: classification_loss = (class_scale * object_mask * K.square(matching_classes - pred_class_prob)) if use_giou_loss: # Calculate GIoU loss as location loss giou = box_giou(raw_true_boxes, pred_boxes) giou = K.squeeze(giou, axis=-1) giou_loss = location_scale * object_mask * (1 - giou) location_loss = giou_loss elif use_diou_loss: # Calculate DIoU loss as location loss diou = box_diou(raw_true_boxes, pred_boxes) diou = K.squeeze(diou, axis=-1) diou_loss = location_scale * object_mask * (1 - diou) location_loss = diou_loss else: # YOLOv2 location loss for matching detection boxes. # Darknet trans box to calculate loss. trans_true_xy = y_true[..., :2] * grid_shape[::-1] - grid trans_true_wh = K.log(y_true[..., 2:4] / anchors * input_shape[::-1]) trans_true_wh = K.switch( object_mask, trans_true_wh, K.zeros_like(trans_true_wh)) # avoid log(0)=-inf trans_true_boxes = K.concatenate([trans_true_xy, trans_true_wh]) # Unadjusted box predictions for loss. trans_pred_boxes = K.concatenate( (K.sigmoid(raw_pred[..., 0:2]), raw_pred[..., 2:4]), axis=-1) location_loss = (location_scale * object_mask * K.square(trans_true_boxes - trans_pred_boxes)) confidence_loss_sum = K.sum(confidence_loss) / batch_size_f location_loss_sum = K.sum(location_loss) / batch_size_f # only involve class loss for multiple classes if num_classes == 1: classification_loss_sum = K.constant(0) else: classification_loss_sum = K.sum(classification_loss) / batch_size_f total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum + location_loss_sum) # Fit for tf 2.0.0 loss shape total_loss = K.expand_dims(total_loss, axis=-1) return total_loss, location_loss_sum, confidence_loss_sum, classification_loss_sum
def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask
def binary_crossentropy(y_true, y_pred): y_true = tf.matmul(y_true, y_true, transpose_b=True) return (K.cast(K.abs(y_true - y_pred) > margin, 'float32')) * K.binary_crossentropy(y_true, y_pred)
def sparse_gather(y_pred, target_indices, task_name): clf_h = Lambda(lambda x: K.reshape(x, (-1, K.int_shape(x)[-1])), name=task_name + '_flatten')(y_pred) return Lambda(lambda x: K.gather(x[0], K.cast(x[1], 'int32')), name=task_name + '_gather')([clf_h, target_indices])
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) # first update the number of iterations self.updates = [K.update_add(self.iterations, 1)] if self.decay_epochs: ite_casted = K.cast(self.iterations, K.dtype(self.decay_epochs)) hit_decay_epoch = K.any(K.equal(ite_casted, self.decay_epochs)) #print(hit_decay_epoch) lr = K.switch(hit_decay_epoch, self.lr['all'] * self.decay['all'], self.lr['all']) #K.print_tensor(self.lr['all']) #a = K.switch(hit_decay_epoch, # K.print_tensor(self.lr['all'],message='Decays:'), # K.print_tensor(self.lr['all'],message=' ')) self.updates.append(K.update(self.lr['all'], lr)) shapes = [K.int_shape(p) for p in params] moments = [K.zeros(s) for s in shapes] self.weights = [self.iterations] + moments #print(self.weights) for p, g, m in zip(params, grads, moments): #print("HEREEEE:", p.name, g, m) lrptrkey = set_pattern_find(p.name, self.lr.keys()) if lrptrkey: if self.verbose > 0: print("Setting different learning rate for ", p.name, " : ", K.eval(self.lr[lrptrkey])) lr = self.lr[lrptrkey] dcptrkey = set_pattern_find(p.name, self.decay.keys()) if self.decay_epochs and dcptrkey: lr = K.switch(hit_decay_epoch, self.lr[lrptrkey] * self.decay[dcptrkey], self.lr[lrptrkey]) self.updates.append(K.update(self.lr[lrptrkey], lr)) if self.verbose > 0: print("Added decay to ", p.name, ": ", K.eval(lr), ",", self.decay[dcptrkey]) elif self.decay_epochs: lr = K.switch(hit_decay_epoch, self.lr[lrptrkey] * self.decay['all'], self.lr[lrptrkey]) self.updates.append(K.update(self.lr[lrptrkey], lr)) if self.verbose > 0: print("Added decay to ", p.name, ": ", K.eval(lr), ",", self.decay['all']) else: lr = self.lr[lrptrkey] else: lr = self.lr['all'] momptrkey = set_pattern_find(p.name, self.momentum.keys()) if momptrkey: if self.verbose > 0: print("Setting different momentum for ", p.name, " , ", K.eval(self.momentum[momptrkey])) momentum = self.momentum[momptrkey] else: momentum = self.momentum['all'] v = momentum * m - lr * g # velocity self.updates.append(K.update(m, v)) if self.nesterov: new_p = p + momentum * (momentum * m - lr * g) - lr * g else: new_p = p + momentum * m - lr * g # CHANGE CLIP _to_tensor = K.tensorflow_backend._to_tensor _clip_by_val = K.tf.clip_by_value margin = K.mean(K.abs(p * K.constant(self.UPCLIP))) min_value = _to_tensor(p - margin, p.dtype.base_dtype) max_value = _to_tensor(p + margin, p.dtype.base_dtype) max_v = K.maximum(min_value, max_value) min_v = K.minimum(min_value, max_value) new_p = _clip_by_val(new_p, min_v, max_v) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) clptrkey = set_pattern_find(p.name, self.clips.keys()) if self.clips_val and clptrkey: if self.verbose > 0: print("Clipping variable", p.name, " to ", self.clips[clptrkey]) c = K.eval(self.clips[clptrkey]) new_p = K.clip(new_p, c[0], c[1]) #print("updates for ", p.name, " lr: ", K.eval(lr), " mom:", K.eval(momentum)) self.updates.append(K.update(p, new_p)) return self.updates
def _mask_loss(y_true, y_pred, y_mask, element_wise_loss): l = K.switch(y_mask, element_wise_loss(y_true, y_pred), K.zeros_like(y_mask, dtype=K.floatx())) return K.sum(l) / (K.cast(K.sum(y_mask), dtype='float32') + K.epsilon())
def call(self, x, mask=None): assert (len(x) == 2) img = x[0] rois = x[1] input_shape = K.shape(img) outputs = [] for roi_idx in range(self.num_rois): x = rois[0, roi_idx, 0] y = rois[0, roi_idx, 1] w = rois[0, roi_idx, 2] h = rois[0, roi_idx, 3] row_length = w / float(self.pool_size) col_length = h / float(self.pool_size) num_pool_regions = self.pool_size #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op # in theano. The theano implementation is much less efficient and leads to long compile times if self.dim_ordering == 'channels_first': for jy in range(num_pool_regions): for ix in range(num_pool_regions): x1 = x + ix * row_length x2 = x1 + row_length y1 = y + jy * col_length y2 = y1 + col_length x1 = K.cast(x1, 'int32') x2 = K.cast(x2, 'int32') y1 = K.cast(y1, 'int32') y2 = K.cast(y2, 'int32') x2 = x1 + K.maximum(1, x2 - x1) y2 = y1 + K.maximum(1, y2 - y1) new_shape = [ input_shape[0], input_shape[1], y2 - y1, x2 - x1 ] x_crop = img[:, :, y1:y2, x1:x2] xm = K.reshape(x_crop, new_shape) pooled_val = K.max(xm, axis=(2, 3)) outputs.append(pooled_val) elif self.dim_ordering == 'channels_last': x = K.cast(x, 'int32') y = K.cast(y, 'int32') w = K.cast(w, 'int32') h = K.cast(h, 'int32') rs = tf.image.resize(img[:, y:y + h, x:x + w, :], (self.pool_size, self.pool_size)) outputs.append(rs) final_output = K.concatenate(outputs, axis=0) final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)) if self.dim_ordering == 'channels_first': final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3)) else: final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4)) return final_output
def call(self, inputs, mask=None): if mask is not None: mask = K.cast(mask, K.floatx()) inputs *= K.expand_dims(mask, axis=-1) return super(MaskedConv1D, self).call(inputs)
def haraka_post_corrections(self, in_out): """ Change any obviously wrong haraka marks according to the character and its context. :param in_out: input layer and prediction layers outputs. :return: corrected predictions. """ inputs, pred_haraka, pred_shadda = in_out if not self.rules_enabled: return pred_haraka char_index = K.argmax(inputs[:, -1], axis=-1) # Force the correct haraka on some letters forced_diac_chars = {CHAR2INDEX['إ']: 3} for f_diac_char, f_diac in forced_diac_chars.items(): mask = K.reshape(K.cast(K.not_equal(char_index, f_diac_char), 'float32'), (-1, 1)) pred_haraka = mask * pred_haraka + (1 - mask) * K.one_hot(f_diac, K.int_shape(pred_haraka)[-1]) # Force the correct haraka before some letters f_prev_diac_chars = {CHAR2INDEX['ى']: 1, CHAR2INDEX['ة']: 1} prev_char_index = K.argmax(inputs[:, -2], axis=-1) for fd_char, f_diac in f_prev_diac_chars.items(): mask = K.cast(K.not_equal(char_index[1:], fd_char), 'float32') mask = K.reshape(K.concatenate([mask, K.ones((1,))], axis=0), (-1, 1)) pred_haraka = pred_haraka * mask + (1 - mask) * K.one_hot(f_diac, K.int_shape(pred_haraka)[-1]) # Allow only Fatha, Fathatan, or nothing before ا if it is in the end of the word mask = K.reshape(K.concatenate([K.clip( K.cast(K.not_equal(char_index[1:-1], CHAR2INDEX['ا']), 'float32') + K.cast(K.not_equal(char_index[2:], CHAR2INDEX[' ']), 'float32'), 0, 1), K.ones((2,))], axis=0), (-1, 1)) pred_haraka = mask * pred_haraka + (1 - mask) * K.constant([1, 1, 0, 0, 0, 1, 0, 0], shape=(1, 8)) * pred_haraka # Force Fatha before ا if it is not in the end of the word mask = K.reshape(K.concatenate([K.clip( K.cast(K.not_equal(char_index[1:-1], CHAR2INDEX['ا']), 'float32') + K.cast(K.equal(char_index[2:], CHAR2INDEX[' ']), 'float32'), 0, 1), K.ones((2,))], axis=0), (-1, 1)) pred_haraka = mask * pred_haraka + (1 - mask) * K.one_hot(1, K.int_shape(pred_haraka)[-1]) # Force no sukun and tanween at the beginning of the word mask = K.reshape( K.concatenate([K.zeros((1,)), K.cast(K.not_equal(prev_char_index[1:], CHAR2INDEX[' ']), 'float32')], axis=0), (-1, 1)) pred_haraka = mask * pred_haraka + (1 - mask) * K.constant([1, 1, 1, 1, 0, 0, 0, 0], shape=(1, 8)) * pred_haraka # Allow tanween only at the end of the word mask = K.reshape(K.concatenate([K.cast(K.not_equal(char_index[1:], CHAR2INDEX[' ']), 'float32'), K.zeros((1,))], axis=0), (-1, 1)) pred_haraka = mask * K.constant([1, 1, 1, 1, 1, 0, 0, 0], shape=(1, 8)) * pred_haraka + (1 - mask) * pred_haraka # Prohibit Fathatan on most letters mask = K.reshape(K.concatenate([K.clip( K.cast(K.not_equal(char_index[1:], CHAR2INDEX[' ']), 'float32') + K.cast(K.not_equal(char_index[:-1], CHAR2INDEX['ء']), 'float32'), 0, 1), K.ones((1,))], axis=0), (-1, 1)) mask *= K.reshape(K.cast(K.not_equal(char_index, CHAR2INDEX['ة']), 'float32'), (-1, 1)) mask *= K.reshape(K.concatenate([K.clip( K.cast(K.not_equal(char_index[1:-1], CHAR2INDEX['ا']), 'float32') + K.cast(K.not_equal(char_index[2:], CHAR2INDEX[' ']), 'float32'), 0, 1), K.ones((2,))], axis=0), (-1, 1)) pred_haraka = mask * K.constant([1, 1, 1, 1, 1, 0, 1, 1], shape=(1, 8)) * pred_haraka + (1 - mask) * pred_haraka # Drop haraka from the forbidden characters forbidden_chars = [CHAR2INDEX[' '], CHAR2INDEX['0'], CHAR2INDEX['آ'], CHAR2INDEX['ى'], CHAR2INDEX['ا']] mask = K.cast(K.not_equal(char_index, forbidden_chars[0]), 'float32') for forbidden_char in forbidden_chars[1:]: mask *= K.cast(K.not_equal(char_index, forbidden_char), 'float32') mask = K.reshape(mask, (-1, 1)) pred_haraka = mask * pred_haraka + (1 - mask) * K.one_hot(0, K.int_shape(pred_haraka)[-1]) return pred_haraka
} layer_dict = dict([(layer.name, layer) for layer in model.layers]) # 源代码是K.variable, 但是似乎用的是tf.Variable, 使用assign_add相关函数后无法求梯度。。。。。、 # 改成tf.constant后可以运行,constant,Variable,tensor含义有区别需要注意 # constant一般定义输入,variable一般定义权重,tensor则是中间结果 # loss = tf.constant(0.) # 将assign_add 改成 tf.add后可以运行 # assign_add是改变了变量值,而tf.add是生成一个tensor,代表了一个表达式 # 貌似只有用tensor,求梯度才有效果,用variable返回[None] loss = K.variable(0.) for layer_name in layer_contributions: coeff = layer_contributions[layer_name] activation = layer_dict[layer_name].output scaling = K.prod(K.cast(K.shape(activation), 'float32')) # 注意这个 K.sum, K.prod对应 tf.reduce_sum, tf.reduce_prod, 其他可以直接改 loss = tf.add( loss, coeff * K.sum(K.square(activation[:, 2:-2, 2:-2, :])) / scaling) # loss += (coeff * K.sum(K.square(activation[:, 2: -2, 2: -2, :])) / scaling) dream = model.input # 求得是输入梯度 grads = tf.gradients(loss, dream)[0] grads /= tf.maximum(tf.reduce_mean(tf.abs(grads)), 1e-7) outputs = [loss, grads] # K.function 与tf.function不同 fetch_loss_and_grads = K.function([dream], outputs)
def get_PAD_mask(q, k): # todo check it latter ones = K.expand_dims(K.ones_like(q, dtype='float32'), -1) mask = K.cast(K.expand_dims(K.not_qual(k, 0), 1), 'float32') mask = K.batch_dot(ones, mask, axes=[2, 1]) return mask
def compute_mask(self, inputs, mask=None): mask_combine = K.all([K.cast(inputs[1], bool), mask[0]], axis=0) return mask_combine
def update_state(self, y_true, y_pred, sample_weight=None): return super(PixelIoU, self).update_state( y_true=y_true, y_pred=K.cast(K.argmax(y_pred, axis=-1), y_true.dtype), sample_weight=sample_weight)
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss
def call(self, inputs): if K.dtype(inputs) != 'int32': inputs = K.cast(inputs, 'int32') embeddings = K.gather(self.embeddings, inputs) embeddings *= self._model_dim ** 0.5 # Scale return embeddings
def calculate_accuracy(true_and_pred): y_true, y_pred = true_and_pred start_prob = y_pred[0][K.cast(y_true[0], dtype='int32')] end_prob = y_pred[1][K.cast(y_true[1], dtype='int32')] return (start_prob + end_prob) / 2.0