def fcn_upsample(small, big, ksize=[4, 4], strides=[2, 2], padding='SAME', name=None, outputs_collections=None): """ the upsample block for fcn, the specific strategy is : 1. [1,1] conv to reduce big's channels so that channels match 2. trans_conv to recover small's resolution so that resolution match :param small: low resolution feature :param big: high resolution feature :param ksize: trans_conv kernel size :param strides: trans_conv kernel stride :param padding: trans_conv kernel padding mode :param name: name for this op :param outputs_collections: add this op's output to outputs_collections :return: """ # trans_conv small to big size with tf.variable_scope(name, 'fcn_upsample'): outc = tensor_shape(small)[-1] big = conv2d(big, outc, ksize=[1, 1], activate=None, name='score_conv') big_shape = tensor_shape(big) big_dim = big_shape[-1] trans_conv = trans_conv2d(small, outc=big_dim, ksize=ksize, output_shape=big_shape, strides=strides, padding=padding) summary = trans_conv + big tf.add_to_collection(outputs_collections, summary) return summary
def draw_bbox(image, bboxes): if tensor_shape(image) == 3: image = tf.expand_dims(image, axis=1) if tensor_shape(bboxes) == 2: bboxes = tf.expand_dims(bboxes, axis=1) after = tf.image.draw_bounding_boxes(image, bboxes) return after
def _yolo_detection_loss(locations, scores, encode_locations, encode_labels, encode_ious, pos_th, background_label=0, alpha=[1.0, 5.0, 1.0, 1.0]): """ Calculate loss for one layer, encode_labels corresponds to the GT box with highest iou, but this iou can be less than neg_th! so need to process and create new labels ! :param locations: predicted locations [N, H, W, K, 4 ] :param scores: predicted scores [N, H, W, K, 21] :param encode_locations: [N, H, W, K, 4] :param encode_labels: [N, H, W, K] :param encode_ious: [N, H, W, K] :return: """ positive_mask = tf.logical_and(encode_labels != background_label, encode_ious > pos_th) positive_mask = tf.cast(positive_mask, tf.float32) tf.add_to_collection('positive_nums', tf.reduce_sum(positive_mask)) num_classes = tensor_shape(scores)[-1] batch_size = tensor_shape(locations)[0] with tf.name_scope('classes_loss'): classes_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.cast( tf.one_hot(encode_labels, depth=num_classes), dtype=tf.float32), logits=scores) classes_loss = classes_loss[..., 1:] classes_loss = tf.reduce_sum(classes_loss, axis=-1) classes_loss = alpha[0] * tf.reduce_sum( classes_loss * positive_mask) / batch_size tf.add_to_collection(tf.GraphKeys.LOSSES, classes_loss) with tf.name_scope('objectness_loss'): # use negative background as objectness object_scores = scores[..., background_label] tf.add_to_collection('objectness', object_scores) object_losses = tf.nn.sigmoid_cross_entropy_with_logits( labels=positive_mask, logits=object_scores) is_obj_losses = alpha[1] * tf.reduce_sum( object_losses * positive_mask) / batch_size non_obj_losses = alpha[2] * tf.reduce_sum( object_losses * (1.0 - positive_mask)) / batch_size tf.add_to_collection(tf.GraphKeys.LOSSES, is_obj_losses) tf.add_to_collection(tf.GraphKeys.LOSSES, non_obj_losses) # with tf.name_scope('bbox_regression_loss'): bbox_loss = tf.square(locations - encode_locations) bbox_loss = tf.reduce_sum(bbox_loss, axis=-1) bbox_loss *= positive_mask bbox_loss = alpha[3] * tf.reduce_sum(bbox_loss) / batch_size tf.add_to_collection(tf.GraphKeys.LOSSES, bbox_loss) return classes_loss, is_obj_losses, non_obj_losses, bbox_loss
def mAP(tensor1, tensor2): shape1 = tensor_shape(tensor1) shape2 = tensor_shape(tensor2) type1 = tensor1.dtype type2 = tensor2.dtype assert shape1 == shape2 assert type1 == type2 equal = tf.cast(tf.equal(tensor1, tensor2), tf.float32) acc = tf.reduce_mean(equal, name='mAP') return acc
def layers_loss_new(prediction_gathers, encoding_gathers, pos_th=0.5, neg_th=0.3, neg_ratio=3, alpha=[1.0, 1.0, 1.0], HNM=False): gather_pred_locations, gather_pred_scores = prediction_gathers gather_truth_locations, gather_truth_labels, gather_truth_ious = encoding_gathers concat_pred_locations = [] concat_pred_scores = [] concat_truth_locations = [] concat_truth_labels = [] concat_truth_ious = [] batch_size = tensor_shape(gather_pred_scores[0])[0] num_classes = tensor_shape(gather_pred_scores[0])[-1] for idx in range(len(gather_pred_locations)): concat_pred_locations.append( tf.reshape(gather_pred_locations[idx], shape=[-1, 4])) concat_pred_scores.append( tf.reshape(gather_pred_scores[idx], shape=[-1, num_classes])) concat_truth_locations.append( tf.reshape(gather_truth_locations[idx], shape=[-1, 4])) concat_truth_labels.append( tf.reshape(gather_truth_labels[idx], shape=[-1])) concat_truth_ious.append(tf.reshape(gather_truth_ious[idx], shape=[-1])) concat_pred_locations = tf.concat(concat_pred_locations, axis=0) concat_pred_scores = tf.concat(concat_pred_scores, axis=0) concat_truth_locations = tf.concat(concat_truth_locations, axis=0) concat_truth_labels = tf.concat(concat_truth_labels, axis=0) concat_truth_ious = tf.concat(concat_truth_ious, axis=0) pos_loss, neg_loss, bbox_loss = _layer_loss( locations=concat_pred_locations, scores=concat_pred_scores, encode_locations=concat_truth_locations, encode_labels=concat_truth_labels, encode_ious=concat_truth_ious, pos_th=pos_th, neg_th=neg_th, neg_ratio=neg_ratio, batch_size=batch_size, alpha=alpha, HNM=HNM) return [pos_loss], [neg_loss], [bbox_loss]
def compare(predictions, labels): if tensor_shape(predictions) != tensor_shape(labels): h, w = tensor_shape(labels)[1:3] predictions = tf.image.resize_nearest_neighbor(predictions, [h, w], align_corners=True) same = tf.logical_or(tf.equal(predictions, labels), tf.equal(labels, 255)) same = tf.cast(same, tf.int32) paint_ = tf.one_hot(same, depth=2, axis=-1, dtype=predictions.dtype) paint_ = tf.squeeze(tf.tensordot(paint_, tf.cast([[255, 0, 0], [0, 0, 0]], predictions.dtype), axes=[[-1], [0]]), axis=3) return paint_
def from_sem_to_boundary(anno, nrange=3): if tensor_rank(anno) == 3: anno = anno[tf.newaxis, ...] H, W = tensor_shape(anno)[1:3] # def generate_boundaries(anno): anno = tf.cast(anno, tf.int32) is_bound = tf.zeros_like(anno, dtype=tf.bool) for r in range(nrange): pad_anno = tf.pad(anno, [[0, 0], [r, r], [r, r], [0, 0]], mode="SYMMETRIC") shifts = [] for ridx in [-1 * r, 0, 1 * r]: for cidx in [-1 * r, 0, 1 * r]: trans_anno = pad_anno[:, (r + ridx):(H + r + ridx), (r + cidx):(W + r + cidx):] shifts.append(trans_anno) for shift in shifts: shift_boundary = tf.not_equal(shift, anno) is_bound = tf.logical_or(is_bound, shift_boundary) return is_bound
def fcn_32(inputs, num_classes=21, weight_init=None, weight_reg=None, bias_init=tf.zeros_initializer, bias_reg=None, device='cpu'): image_shape = tensor_shape(inputs) with arg_scope(vgg_arg_scope()): fcn32, end_points = vgg_16(inputs, num_classes=num_classes, spatial_squeeze=False, fc_conv_padding='SAME') with tf.name_scope('upscale') as ns: end_points_collection = ns + '_end_points' with arg_scope( fcn_arg_scope(weight_init, weight_reg, bias_init, bias_reg, device, end_points_collection)): # conv7 deconv and add with pool4 [jump = 16] fcn1 = trans_conv2d(fcn32, outc=num_classes, ksize=[64, 64], strides=[32, 32], output_shape=image_shape[:-1] + [num_classes], name='to_1') print(tf.get_collection(end_points_collection)) end_points.update( dict([(ep.name, ep) for ep in tf.get_collection(end_points_collection)])) end_points[ns + '_to_1'] = fcn1 return fcn1, end_points
def _layer_prediction(feature_map, num_anchors, conv_params, num_classes, scope=None): """ For each location in feature map, predict 4*num_anchors locations and num_classes objectness :param feature_map: [None, H, W, C] :param scope: :return: locations with shape [None, H, W, num_anchors, 4] scores with shape [None, H, W, num_anchors, num_classes] """ with tf.variable_scope(scope, 'feature2bbox'): # TODO : CHECK ACTIVATION FUNC HERE with slim.arg_scope([conv2d], activation_fn=None, normalizer_fn=None, **conv_params): locations = conv2d(feature_map, kernel_size=3, num_outputs=num_anchors * 4, scope='conv_loc') scores = conv2d(feature_map, kernel_size=3, num_outputs=num_anchors * num_classes, scope='conv_obj') partial_shape = (tensor_shape(locations))[1:-1] locations = tf.reshape(locations, shape=[-1] + partial_shape + [num_anchors, 4]) scores = tf.reshape(scores, shape=[-1] + partial_shape + [num_anchors, num_classes]) return locations, scores
def soft_nms(scores, bboxes, max_output_size, sigma=0.5): def gaussian_decay(score, degree, sigma=1.0): return score * tf.exp(-degree**2 / sigma) bboxes_num = tensor_shape(scores)[0] loop_times = min(bboxes_num, max_output_size) is_select = tf.zeros(shape=[bboxes_num], dtype=tf.float32) def condition(i, scores, is_select): return tf.less(i, loop_times) def main_body(i, scores, is_select): idx = tf.argmax(scores * (1 - is_select)) # mark idx as one is_select = is_select + \ tf.cast(tf.one_hot(idx, bboxes_num), tf.float32) ious = iou(bboxes, tf.gather(bboxes, idx)) decay_scores = gaussian_decay(scores, ious, sigma=sigma) scores = is_select * scores + (1 - is_select) * decay_scores return [i + 1, scores, is_select] i = 0 [i, scores, is_select] = tf.while_loop(cond=condition, body=main_body, loop_vars=[i, scores, is_select]) # [?,] idxes = tf.squeeze(tf.where(is_select > 0), axis=-1) sorted_scores, sorted_idx = tf.nn.top_k(tf.gather(scores, idxes), k=loop_times) sorted_bbox = tf.gather(bboxes, tf.gather(idxes, sorted_idx)) return sorted_scores, sorted_bbox
def multi_scale_loss(logits_pyramids, labels, loss_func, resize_labels=False, loss_func_args=None): _, label_h, label_w, _ = tensor_shape(labels) loss_pyramids = [] for logits in logits_pyramids: _, h, w, _ = tensor_shape(logits) if resize_labels: resized_labels = tf.image.resize_nearest_neighbor( labels, size=[h, w], align_corners=True) loss = loss_func(logits, resized_labels, **loss_func_args) else: resized_logits = tf.image.resize_bilinear(logits, size=[label_h, label_w], align_corners=True) loss = loss_func(resized_logits, labels, **loss_func_args) loss_pyramids.append(loss) return loss_pyramids
def fcn_8(inputs, num_classes=21, weight_init=None, weight_reg=None, bias_init=tf.zeros_initializer, bias_reg=None, device='cpu'): image_shape = tensor_shape(inputs) with arg_scope( vgg_arg_scope(weight_init, weight_reg, bias_init, bias_reg, device=device)): fcn32, end_points = vgg_16(inputs, num_classes=num_classes, spatial_squeeze=False, fc_conv_padding='SAME') with tf.name_scope('upscale') as ns: end_points_collection = ns + '_end_points' with arg_scope( fcn_arg_scope(weight_init, weight_reg, bias_init, bias_reg, device, end_points_collection)): # conv7 deconv and add with pool4 [jump = 16] pool4 = end_points['vgg_16/pool4:0'] fcn16 = fcn_upsample(fcn32, pool4, ksize=[4, 4], name='to_16') pool3 = end_points['vgg_16/pool3:0'] fcn8 = fcn_upsample(fcn16, pool3, ksize=[4, 4], name='to_8') fcn1 = trans_conv2d(fcn8, outc=num_classes, ksize=[16, 16], strides=[8, 8], output_shape=image_shape[:-1] + [num_classes], name='trans_conv/to_1') # print(tf.get_collection(end_points_collection)) end_points.update( dict([(ep.name, ep) for ep in tf.get_collection(end_points_collection)])) end_points[ns + '_to_32'] = fcn32 end_points[ns + '_to_16'] = fcn16 end_points[ns + '_to_8'] = fcn8 end_points[ns + '_to_1'] = fcn1 return fcn1, end_points
def gaussian_blur(img, kernel_size=5, sigma=1.0): """ :param img: [H, W, C] or [N, H, W, C] :param kernel_size: :param sigma: :return: """ if type(kernel_size) is list: kernel_size = tf.random_shuffle(kernel_size)[0] tf.add_to_collection('kernel_size', kernel_size) squeeze = False if tensor_rank(img) == 3: img = tf.expand_dims(img, axis=0) squeeze = True # generate gaussian kernel g_r = tf.range(kernel_size) kernel_size = tf.cast(kernel_size, tf.float32) g_r = tf.cast(g_r, tf.float32) if sigma is None: # https://docs.opencv.org/3.1.0/d4/d86/group__imgproc__filter.html#gac05a120c1ae92a6060dd0db190a61afa sigma = 0.3 * ((kernel_size - 1) * 0.5 - 1) + 0.8 g_r = tf.exp(-1.0 * (g_r - 0.5 * (kernel_size - 1))**2 / (2.0 * sigma**2)) g_r = g_r / tf.reduce_sum(g_r) g_2d = g_r[tf.newaxis, ...] * g_r[..., tf.newaxis] g_2d = g_2d[..., tf.newaxis, tf.newaxis] kernel_size = tf.cast(kernel_size, tf.int32) f = lambda x: tf.nn.conv2d( same_padding(x, [kernel_size, kernel_size], [1, 1]), filter=g_2d, strides=[1, 1, 1, 1], padding='VALID', ) blurs = [] for i in range(tensor_shape(img)[-1]): blurs.append(f(img[..., i][..., tf.newaxis])) blur = tf.concat(blurs, axis=-1) if squeeze: blur = tf.squeeze(blur, axis=0) return blur
def get_image_pyramids(images, scales, method=tf.image.ResizeMethod.BILINEAR, align_corners=True): _, h, w, _ = tensor_shape(images) scales = sorted(scales) image_pyramids = [] for scale in scales: scale_h, scale_w = int(ceil(scale * h)), int(ceil(scale * w)) resize_image = tf.image.resize_images( images, size=[scale_h, scale_w], method=tf.image.ResizeMethod.BILINEAR, align_corners=align_corners) image_pyramids.append(resize_image) return image_pyramids
def layers_anchors(end_points): """ Gather anchors from layers :param end_points: :return: """ ys, xs, hs, ws = [], [], [], [] for idx, key in enumerate(default_params.feat_layers): layer = end_points[key] y, x, h, w = _layer_anchors( tensor_shape(layer)[1:-1], default_params.feat_steps[idx], default_params.anchor_scales[idx], default_params.anchor_scales[idx + 1], default_params.anchor_ratios[idx]) ys.append(y) xs.append(x) hs.append(h) ws.append(w) return ys, xs, hs, ws
def fcn_16(inputs, num_classes=21, weight_init=None, weight_reg=None, bias_init=tf.zeros_initializer, bias_reg=None): image_shape = tensor_shape(inputs) with arg_scope(vgg_arg_scope()): fcn32, end_points = vgg_16(inputs, num_classes=num_classes, spatial_squeeze=False, fc_conv_padding='SAME') prefix_name = list(end_points.keys())[0] prefix_name = prefix_name[:search('vgg_16', prefix_name).span()[0]] with tf.name_scope('upscale') as ns: end_points_collection = ns + '_end_points' with arg_scope( fcn_arg_scope(weight_init, weight_reg, bias_init, bias_reg, end_points_collection)): # conv7 deconv and add with pool4 [jump = 16] pool4 = end_points[prefix_name + 'vgg_16/pool4:0'] fcn16 = fcn_upsample(fcn32, pool4, ksize=[4, 4], name='to_16') fcn1 = trans_conv2d(fcn16, outc=num_classes, ksize=[32, 32], strides=[16, 16], output_shape=image_shape[:-1] + [num_classes], name='to_1') print(tf.get_collection(end_points_collection)) end_points.update( dict([(ep.name, ep) for ep in tf.get_collection(end_points_collection)])) end_points[ns + '_to_32'] = fcn32 end_points[ns + '_to_16'] = fcn16 end_points[ns + '_to_1'] = fcn1 return fcn1, end_points
def gaussian_edge( input, kernel=(3, 3), sigma=None, nearest=3, dtype=tf.float32 ): """ For each point in input, if input is larger than 0, assign a gaussian distribution around the point For multiple gaussian, take their maximum! :param input: [N, H, W] or [N, H, W, 1] :param kernel: [kh, kw] :param sigma: [sh, sw] :param dtype: output type :return: """ assert input.dtype is tf.int32 add_tail_axis = False if tensor_rank(input) == 4: if tensor_shape(input)[-1] == 1: input = input[..., 0] add_tail_axis = True # sigma = 0.3\*((ksize-1)\*0.5 - 1) + 0.8 if sigma is None: sigmax = 0.3 * (kernel[0] * 0.5 - 1) + 0.8 sigmay = 0.3 * (kernel[1] * 0.5 - 1) + 0.8 sigma = [sigmax, sigmay] edge = gaussian_edge_op(x=input, T=dtype, kernel=kernel, sigma=sigma, nearest=nearest) if add_tail_axis: edge = edge[..., tf.newaxis] return edge
def _layer_prediction(feature_map, num_anchors, num_classes, l2_norm=False, name=None): """ For each location in feature map, predict 4*num_anchors locations and num_classes objectness :param feature_map: [None, H, W, C] :param num_classes: :param name: :return: locations with shape [None, H, W, num_anchors, 4] scores with shape [None, H, W, num_anchors, num_classes] """ with tf.variable_scope(name, 'feature2bbox'): if l2_norm: feature_map = l2_norm_1D(feature_map, scale=True) locations = conv2d(feature_map, outc=4 * num_anchors, ksize=[3, 3], activate=None, name='conv_loc') scores = conv2d(feature_map, outc=num_anchors * num_classes, ksize=[3, 3], activate=None, name='conv_cls') partial_shape = (tensor_shape(feature_map))[1:-1] locations = tf.reshape(locations, shape=[-1] + partial_shape + [num_anchors, 4]) scores = tf.reshape(scores, shape=[-1] + partial_shape + [num_anchors, num_classes]) # batch size = 1 version locations = tf.squeeze(locations, axis=0) scores = tf.squeeze(scores, axis=0) return locations, scores
def locate_boundary(labels): """ locate boundaries in labels todo: test this function :param labels: [N, H, W, C] :return: a bool tensor, true indicating boundaries """ H, W = tensor_shape(labels)[1:3] pad = tf.pad(labels, [[0, 0], [0, 1], [0, 0], [0, 0]], mode='REFLECT')[:, 1:, :, :] boundary = tf.equal(pad, labels) pad = tf.pad(labels, [[0, 0], [0, 0], [0, 1], [0, 0]], mode='REFLECT')[:, :, 1:, :] boundary = tf.logical_or(boundary, tf.equal(pad, labels)) expansions = tf.cast( tf.pad(labels, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='REFLECT'), tf.bool) for xmove in [-1, 0, 1]: for ymove in [-1, 0, 1]: boundary = tf.logical_or( boundary, expansions[:, 1 + xmove:1 + xmove + H, 1 + ymove:1 + ymove + W, :]) return boundary
def _get_logits(images, model_options, outputs_to_num_classes, weight_decay=0.0001, reuse=tf.AUTO_REUSE, is_training=False, fine_tune_batch_norm=False): """Gets the logits by atrous/image spatial pyramid pooling. Args: images: A tensor of size [batch, height, width, channels]. model_options: A ModelOptions instance to configure models. weight_decay: The weight decay for model variables. reuse: Reuse the model variables or not. is_training: Is training or not. fine_tune_batch_norm: Fine-tune the batch norm parameters or not. Returns: outputs_to_logits: A map from output_type to logits. """ features, end_points = _extract_features( images, model_options, weight_decay=weight_decay, reuse=reuse, is_training=is_training, fine_tune_batch_norm=fine_tune_batch_norm) print('ASPP FEATRUES', features) for i in end_points.keys(): print(end_points[i]) DEBUG_VARS.aspp_result = features if model_options.decoder_output_stride is not None: _, image_h, image_w, _ = tensor_shape(images) decoder_height = scale_dimension( image_h, 1.0 / model_options.decoder_output_stride) decoder_width = scale_dimension( image_w, 1.0 / model_options.decoder_output_stride) features = refine_by_decoder(features, end_points, decoder_height=decoder_height, decoder_width=decoder_width, decoder_use_separable_conv=model_options. decoder_use_separable_conv, model_variant=model_options.model_variant, weight_decay=weight_decay, reuse=reuse, is_training=is_training, fine_tune_batch_norm=fine_tune_batch_norm) outputs_to_logits = {} for output in sorted(outputs_to_num_classes): outputs_to_logits[output] = _get_branch_logits( features, outputs_to_num_classes[output], model_options.atrous_rates, aspp_with_batch_norm=model_options.aspp_with_batch_norm, kernel_size=model_options.logits_kernel_size, weight_decay=weight_decay, reuse=reuse, scope_suffix=output) outputs_to_logits['detection'] = _get_detection( end_points, num_classes=outputs_to_num_classes[output], weight_decay=weight_decay, reuse=reuse, scope_suffix='scale') return outputs_to_logits
def _layer_loss(locations, scores, encode_locations, encode_labels, encode_ious, pos_th, neg_th, neg_ratio, background_label=0, alpha=[1.0, 1.0, 1.0], HNM=False, batch_size=None): """ Calculate loss for one layer, encode_labels corresponds to the GT box with highest iou, but this iou can be less than neg_th! so need to process and create new labels ! :param locations: predicted locations [N, H, W, K, 4 ] :param scores: predicted scores [N, H, W, K, 21] :param encode_locations: [N, H, W, K, 4] :param encode_labels: [N, H, W, K] :param encode_ious: [N, H, W, K] :return: """ positive_mask = encode_ious > pos_th # need to redefine the labels, those assgined to some class with iou < neg_th, should be assgined to background negative_mask = tf.logical_and(encode_ious <= neg_th, tf.logical_not(positive_mask)) # background_label for negative and label for positive negative_labels = tf.where( negative_mask, background_label * tf.cast(negative_mask, tf.int32), encode_labels) # tf.add_to_collection('debug', negative_labels) if batch_size is None: batch_size = tensor_shape(locations)[0] if HNM: positive_num = tf.reduce_sum(tf.cast(positive_mask, tf.int32)) # calculate background scores neg_scores = tf.nn.softmax(scores, axis=-1)[..., background_label] neg_scores = tf.where( negative_mask, neg_scores, # set positive ones's negative score to be 1, so that it won't be count in top_k 1.0 - tf.cast(negative_mask, tf.float32)) # solve #negative, add one so that neg_values has more than one value max_negative_num = tf.reduce_sum(tf.cast(negative_mask, tf.int32)) negative_num = neg_ratio * positive_num + batch_size negative_num = tf.minimum(negative_num, max_negative_num) # Hard Negative Mining: # find those with lower background scores, but are indeed background! neg_values, _ = tf.nn.top_k(tf.reshape(-1.0 * neg_scores, [-1]), k=negative_num) negative_mask = tf.logical_and(negative_mask, neg_scores < -neg_values[-1]) positive_mask = tf.cast(positive_mask, tf.float32) negative_mask = tf.cast(negative_mask, tf.float32) tf.add_to_collection(ZERO_NUMBER_PER_LAYER_SCOPE, tf.reduce_sum(positive_mask * negative_mask)) tf.add_to_collection(POS_NUMBER_PER_LAYER_SCOPE, tf.reduce_sum(positive_mask)) tf.add_to_collection(NEG_NUMBER_PER_LAYER_SCOPE, tf.reduce_sum(negative_mask)) with tf.name_scope('cross_entropy_loss'): with tf.name_scope('positive'): pos_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=scores, labels=encode_labels) pos_loss = tf.div(tf.reduce_sum(pos_loss * positive_mask), batch_size) pos_loss *= alpha[0] tf.add_to_collection(tf.GraphKeys.LOSSES, pos_loss) with tf.name_scope('negative'): neg_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=scores, labels=negative_labels) neg_loss = tf.div(tf.reduce_sum(neg_loss * negative_mask), batch_size) neg_loss *= alpha[1] tf.add_to_collection(tf.GraphKeys.LOSSES, neg_loss) with tf.name_scope('bbox_regression_loss'): bbox_loss = smooth_l1(locations - encode_locations) bbox_loss = tf.reduce_sum(bbox_loss, axis=-1) bbox_loss = tf.div(tf.reduce_sum(bbox_loss * positive_mask), batch_size) bbox_loss *= alpha[2] tf.add_to_collection(tf.GraphKeys.LOSSES, bbox_loss) return pos_loss, neg_loss, bbox_loss
def deform_conv2d(inputs, num_outputs, kernel_size, stride=1, rate=1, padding='SAME', activation_fn=tf.nn.relu, deformable_group=1, num_groups=1, normalizer_fn=None, weights_initializer=None, weights_regularizer=None, biases_initializer=tf.zeros_initializer, biases_regularizer=None, outputs_collections=None, offsets_collections='offsets', scope=None): assert num_outputs % num_groups == 0, print('outc % num_groups != 0') kernel_size = [kernel_size, kernel_size] if type(kernel_size) is int else kernel_size stride = [stride, stride] if type(stride) is int else stride rate = [rate, rate] if type(rate) is int else rate with tf.variable_scope(scope, 'deform_conv2d'): _, iH, iW, indim = tensor_shape(inputs) assert indim % num_groups == 0, print('indim % num_groups != 0') assert indim % deformable_group == 0, print('indim % deformable_group != 0') offsets = conv2d( inputs, num_outputs= kernel_size[0] * kernel_size[1] * 2 * deformable_group, kernel_size=kernel_size, stride=stride, rate=rate, padding=padding, normalizer_fn=None, activation_fn=None, # may be using zero initializer? # weight_init=tf.zeros_initializer, weights_initializer=weights_initializer, weights_regularizer=weights_regularizer, biases_initializer=tf.zeros_initializer, biases_regularizer=None, outputs_collections=offsets_collections, scope = 'conv_offsets' ) offsets = tf.transpose(offsets, [0, 3, 1, 2]) # TODO: MAYA offsets *= 0.0 filters = tf.get_variable(name='weights', shape= kernel_size + [indim // num_groups, num_outputs], initializer=weights_initializer, regularizer=weights_regularizer) # transpose filters to required order # [outC, inC, ksize, ksize] filters = tf.transpose(filters, [3, 2, 0, 1]) inputs = tf.transpose(inputs, [0, 3, 1, 2]) conv = deform_conv_op.deform_conv_op(x=inputs, filter=filters, offset=offsets, strides=[1, 1] + stride, rates=[1, 1] + rate, num_groups=num_groups, padding=padding, deformable_group=deformable_group, name=scope) conv = tf.transpose(conv, [0, 2, 3, 1]) # tf.add_to_collection(outputs_collections, conv) if normalizer_fn is not None: conv = normalizer_fn(conv) elif biases_initializer is not None: biases = tf.get_variable(name='biases', shape=[num_outputs], initializer=biases_initializer, regularizer=biases_regularizer, collections=BIAS_COLLECTIONS) conv = conv + biases if activation_fn is not None: conv = activation_fn(conv) tf.add_to_collection(outputs_collections, conv) return conv
def deform_conv2d(inputs, outc, ksize, strides=[1, 1], ratios=[1, 1], name=None, padding='SAME', activate=tf.nn.relu, deformable_group=1, num_groups=1, batch_norm=True, group_norm=False, use_bias=None, weight_init=None, weight_reg=None, bias_init=tf.zeros_initializer, bias_reg=None, offset_init=tf.zeros_initializer, offset_reg=None, outputs_collections=None, offsets_collections='offsets'): """ Wrapper for Conv layers :param inputs: [N, H, W, C] :param outc: output channels :param ksize: [hk, wk] :param strides: [hs, ws] :param ratios: [hr, wr] :param name: var_scope & operation name :param padding: padding mode :param activate: activate function :param batch_norm: whether performs batch norm :param use_bias: whether use bias addition :param weight_init: weight initializer :param weight_reg: weight regularizer :param bias_init: bias initializer :param bias_reg: bias regularizer :param outputs_collections: add result to some collection :return: convolution after activation """ # can't use both if use_bias is None: use_bias = not batch_norm assert not (batch_norm and use_bias) assert outc % num_groups == 0, print('outc % num_groups != 0') with tf.variable_scope(name, 'deform_conv2d'): _, iH, iW, indim = tensor_shape(inputs) assert indim % num_groups == 0, print('indim % num_groups != 0') assert indim % deformable_group == 0, print( 'indim % deformable_group != 0') # use num groups xixi filters = get_variable(name='weights', shape=ksize + [indim // num_groups, outc], init=weight_init, reg=weight_reg, collections=WEIGHT_COLLECTIONS) # use get_variable merely for debug! offsets = conv2d( inputs, outc=ksize[0] * ksize[1] * 2 * deformable_group, ksize=ksize, strides=strides, ratios=ratios, padding=padding, batch_norm=False, group_norm=False, use_bias=True, activate=None, name='conv_offsets', # may be using zero initializer? # weight_init=tf.zeros_initializer, weight_init=weight_init, weight_reg=weight_reg, bias_init=tf.zeros_initializer, bias_reg=None, outputs_collections=offsets_collections) offsets = tf.transpose(offsets, [0, 3, 1, 2]) tf.add_to_collection('offsets', offsets) # transpose filters to required order # [outC, inC, ksize, ksize] filters = tf.transpose(filters, [3, 2, 0, 1]) inputs = tf.transpose(inputs, [0, 3, 1, 2]) conv = deform_conv_op.deform_conv_op(x=inputs, filter=filters, offset=offsets, strides=[1, 1] + strides, rates=[1, 1] + ratios, num_groups=num_groups, padding=padding, deformable_group=deformable_group, name=name) conv = tf.transpose(conv, [0, 2, 3, 1]) # tf.add_to_collection(outputs_collections, conv) if batch_norm: conv = batch_norm2d(conv) elif group_norm: conv = GroupNorm2D(conv) elif use_bias: biases = get_variable(name='biases', shape=[outc], init=bias_init, reg=bias_reg, collections=BIAS_COLLECTIONS) conv = conv + biases if activate is not None: conv = activate(conv) tf.add_to_collection(outputs_collections, conv) return conv
def _extract_features(images, model_options, weight_decay=0.0001, reuse=tf.AUTO_REUSE, is_training=False, fine_tune_batch_norm=False): """Extracts features by the particular model_variant. Args: images: A tensor of size [batch, height, width, channels]. model_options: A ModelOptions instance to configure models. weight_decay: The weight decay for model variables. reuse: Reuse the model variables or not. is_training: Is training or not. fine_tune_batch_norm: Fine-tune the batch norm parameters or not. Returns: concat_logits: A tensor of size [batch, feature_height, feature_width, feature_channels], where feature_height/feature_width are determined by the images height/width and output_stride. end_points: A dictionary from components of the network to the corresponding activation. """ # feature extractor is a backbone factory DEBUG_VARS.raw_image = images features, end_points = feature_extractor.extract_features( images, output_stride=model_options.output_stride, multi_grid=model_options.multi_grid, model_variant=model_options.model_variant, weight_decay=weight_decay, reuse=reuse, is_training=is_training, fine_tune_batch_norm=fine_tune_batch_norm) # TODO:check # DEBUG_VARS.xception_feature = end_points['xception_65/entry_flow/conv1_1/Relu:0'] DEBUG_VARS.xception_feature = features if not model_options.aspp_with_batch_norm: return features, end_points else: batch_norm_params = { 'is_training': is_training and fine_tune_batch_norm, 'decay': 0.9997, 'eps': 1e-5, 'affine': True, } regularize_func = regularizer('l2', weight_decay) with tf.variable_scope(tf.get_variable_scope(), reuse=reuse): with arg_scope([sep_conv2d], activate=tf.nn.relu, activate_middle=tf.nn.relu, batch_norm=True, depthwise_weight_reg=None, pointwise_weight_reg=regularize_func, padding='SAME', strides=[1, 1]): with arg_scope([conv2d], activate=tf.nn.relu, weight_reg=regularize_func, batch_norm=True, padding='SAME', strides=[1, 1]): # TODO: ASPP IS IMPLEMENTED HERE! Check Out! with arg_scope([batch_norm2d], **batch_norm_params): depth = 256 branch_logits = [] # TODO: ADD IMAGE POOLING HERE if model_options.add_image_level_feature: # this crop size has been updated to the new scaled one outside, which is the exact size # of this model's inputs _, image_h, image_w, _ = tensor_shape(images) pool_height = scale_dimension( image_h, 1. / model_options.output_stride) pool_width = scale_dimension( image_w, 1. / model_options.output_stride) # global average pooling, check whether the shape here is 1? image_feature = avg_pool2d( features, [pool_height, pool_width], [pool_height, pool_width], padding='VALID') # collapse channels to depth after GAP image_feature = conv2d(inputs=image_feature, outc=depth, ksize=[1, 1], name=_IMAGE_POOLING_SCOPE) # TODO:check DEBUG_VARS.image_feature = image_feature # reshape it to final feature map shape image_feature = tf.image.resize_bilinear( image_feature, [pool_height, pool_width], align_corners=True) image_feature.set_shape( [None, pool_height, pool_width, depth]) # add image level feature to branch_logits branch_logits.append(image_feature) # Employ a 1x1 convolution. branch_logits.append( conv2d(features, outc=depth, ksize=[1, 1], name=_ASPP_SCOPE + str(0))) if model_options.atrous_rates: # Employ 3x3 convolutions with different atrous rates. DEBUG_VARS.aspp_features = [] for i, rate in enumerate( model_options.atrous_rates, 1): scope = _ASPP_SCOPE + str(i) if model_options.aspp_with_separable_conv: aspp_features = sep_conv2d( features, outc=depth, ksize=[3, 3], ratios=[rate, rate], name=scope) DEBUG_VARS.aspp_features.append( aspp_features) else: aspp_features = conv2d(features, outc=depth, ksize=[3, 3], ratios=[rate, rate], name=scope) branch_logits.append(aspp_features) # Merge branch logits. concat_logits = tf.concat(branch_logits, 3) concat_logits = conv2d(inputs=concat_logits, outc=depth, ksize=[1, 1], name=_CONCAT_PROJECTION_SCOPE) DEBUG_VARS.aspp_concat_feature = concat_logits concat_logits = drop_out( concat_logits, kp_prob=0.9, is_training=is_training, name=_CONCAT_PROJECTION_SCOPE + '_dropout') return concat_logits, end_points