def sample_rpn_outputs(boxes, scores, is_training=False, only_positive=False): """Sample boxes according to scores and some learning strategies assuming the first class is background Params: boxes: of shape (..., Ax4), each entry is [x1, y1, x2, y2], the last axis has k*4 dims scores: of shape (..., A), probs of fg, in [0, 1] """ min_size = cfg.FLAGS.min_size rpn_nms_threshold = cfg.FLAGS.rpn_nms_threshold pre_nms_top_n = cfg.FLAGS.pre_nms_top_n post_nms_top_n = cfg.FLAGS.post_nms_top_n if not is_training: pre_nms_top_n = int(pre_nms_top_n / 2) post_nms_top_n = int(post_nms_top_n / 2) boxes = boxes.reshape((-1, 4)) scores = scores.reshape((-1, 1)) # filter backgrounds # Hope this will filter most of background anchors, since a argsort is too slow.. if only_positive: keeps = np.where(scores > 0.5)[0] boxes = boxes[keeps, :] scores = scores[keeps] # filter minimum size keeps = _filter_boxes(boxes, min_size=min_size) boxes = boxes[keeps, :] scores = scores[keeps] # filter with scores order = scores.ravel().argsort()[::-1] if pre_nms_top_n > 0: order = order[:pre_nms_top_n] boxes = boxes[order, :] scores = scores[order] # filter with nms det = np.hstack((boxes, scores)).astype(np.float32) keeps = nms_wrapper.nms(det, rpn_nms_threshold) if post_nms_top_n > 0: keeps = keeps[:post_nms_top_n] boxes = boxes[keeps, :] scores = scores[keeps] batch_inds = np.zeros([boxes.shape[0]], dtype=np.int32) if _DEBUG: LOG('SAMPLE: %d rois has been choosen' % len(keeps)) LOG('SAMPLE: a positive box: %d %d %d %d %.4f' % (boxes[0, 0], boxes[0, 1], boxes[0, 2], boxes[0, 3], scores[0])) hs = boxes[:, 3] - boxes[:, 1] ws = boxes[:, 2] - boxes[:, 0] assert min(np.min(hs), np.min(ws)) > 0, 'invalid boxes' return boxes, scores, batch_inds
def encode(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width): """Encode masks groundtruth into learnable targets Sample some exmaples Params ------ gt_masks: image_height x image_width {0, 1} matrix, of shape (G, imh, imw) gt_boxes: ground-truth boxes of shape (G, 5), each raw is [x1, y1, x2, y2, class] rois: the bounding boxes of shape (N, 4), ## scores: scores of shape (N, 1) num_classes; K mask_height, mask_width: height and width of output masks Returns ------- # rois: boxes sampled for cropping masks, of shape (M, 4) labels: class-ids of shape (M, 1) mask_targets: learning targets of shape (M, pooled_height, pooled_width, K) in {0, 1} values mask_inside_weights: of shape (M, pooled_height, pooled_width, K) in {0, 1} indicating which mask is sampled """ total_masks = rois.shape[0] # B x G overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(rois[:, 0:4], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # shape is N max_overlaps = overlaps[np.arange(len(gt_assignment)), gt_assignment] # N labels = gt_boxes[gt_assignment, 4] # N # sample positive rois which intersection is more than 0.5 keep_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0] num_masks = int(min(keep_inds.size, cfg.FLAGS.masks_per_image)) if keep_inds.size > 0: keep_inds = np.random.choice(keep_inds, size=num_masks, replace=False) LOG('Masks: %d of %d rois are considered positive mask. Number of masks %d'\ %(num_masks, rois.shape[0], gt_masks.shape[0])) # rois = rois[inds] # labels = labels[inds].astype(np.int32) # gt_assignment = gt_assignment[inds] mask_targets = np.zeros( (total_masks, mask_height, mask_width, num_classes), dtype=np.int32) mask_inside_weights = np.zeros( (total_masks, mask_height, mask_width, num_classes), dtype=np.float32) # TODO: speed bottleneck? for i in keep_inds: roi = rois[i, :4] cropped = gt_masks[gt_assignment[i], int(roi[1]):int(roi[3]) + 1, int(roi[0]):int(roi[2]) + 1] cropped = cv2.resize(cropped, (mask_width, mask_height), interpolation=cv2.INTER_NEAREST) mask_targets[i, :, :, int(labels[i])] = cropped mask_inside_weights[i, :, :, int(labels[i])] = 1 return labels, mask_targets, mask_inside_weights
def _get_coco_masks(coco, img_id, height, width, img_name): """ get the masks for all the instances Note: some images are not annotated Return: masks, mxhxw numpy array classes, mx1 bboxes, mx4 """ annIds = coco.getAnnIds(imgIds=[img_id], iscrowd=None) # assert annIds is not None and annIds > 0, 'No annotaion for %s' % str(img_id) anns = coco.loadAnns(annIds) # assert len(anns) > 0, 'No annotaion for %s' % str(img_id) masks = [] classes = [] bboxes = [] mask = np.zeros((height, width), dtype=np.float32) segmentations = [] for ann in anns: m = coco.annToMask(ann) # zero one mask assert m.shape[0] == height and m.shape[1] == width, \ 'image %s and ann %s dont match' % (img_id, ann) masks.append(m) cat_id = _cat_id_to_real_id(ann['category_id']) classes.append(cat_id) bboxes.append(ann['bbox']) m = m.astype(np.float32) * cat_id mask[m > 0] = m[m > 0] masks = np.asarray(masks) classes = np.asarray(classes) bboxes = np.asarray(bboxes) # to x1, y1, x2, y2 non_annotation_tag = False if bboxes.shape[0] <= 0: bboxes = np.zeros([0, 4], dtype=np.float32) classes = np.zeros([0], dtype=np.float32) sys.stdout.write('\nNone Annotations %s , passed\n' % img_name) sys.stdout.flush() LOG('None Annotations %s' % img_name) non_annotation_tag = True bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2] bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3] gt_boxes = np.hstack((bboxes, classes[:, np.newaxis])) gt_boxes = gt_boxes.astype(np.float32) masks = masks.astype(np.uint8) mask = mask.astype(np.uint8) assert masks.shape[0] == gt_boxes.shape[0], 'Shape Error' return gt_boxes, masks, mask,non_annotation_tag
def sample_rpn_outputs(boxes, scores, is_training=False, only_positive=False): """Sample boxes according to scores and some learning strategies assuming the first class is background Params: boxes: of shape (..., Ax4), each entry is [x1, y1, x2, y2], the last axis has k*4 dims scores: of shape (..., A), foreground prob """ min_size = cfg.FLAGS.min_size rpn_nms_threshold = cfg.FLAGS.rpn_nms_threshold pre_nms_top_n = cfg.FLAGS.pre_nms_top_n post_nms_top_n = cfg.FLAGS.post_nms_top_n if not is_training: pre_nms_top_n = int(pre_nms_top_n / 2) post_nms_top_n = int(post_nms_top_n / 2) boxes = boxes.reshape((-1, 4)) scores = scores.reshape((-1, 1)) # filter backgrounds # Hope this will filter most of background anchors, since a argsort is too slow.. if only_positive: keeps = np.where(scores > 0.5)[0] boxes = boxes[keeps, :] scores = scores[keeps] # filter minimum size keeps = _filter_boxes(boxes, min_size=min_size) boxes = boxes[keeps, :] scores = scores[keeps] # filter with scores order = scores.ravel().argsort() if cfg.FLAGS.pre_nms_top_n > 0: order = order[:pre_nms_top_n] boxes = boxes[order, :] scores = scores[order] # filter with nms det = np.hstack((boxes, scores)).astype(np.float32) keeps = nms_wrapper.nms(det, rpn_nms_threshold) if cfg.FLAGS.post_nms_top_n > 0: keeps = keeps[:post_nms_top_n] boxes = boxes[keeps, :] scores = scores[keeps] LOG('%d rois has been choosen' % len(keeps)) return boxes, scores
def _get_coco_masks(self, coco, img_id, height, width, img_name): """ get the masks for all the instances Note: some images are not annotated Return: masks, mxhxw numpy array classes, mx1 bboxes, mx4 """ annIds = coco.getAnnIds(imgIds=[img_id], iscrowd=None) # assert annIds is not None and annIds > 0, 'No annotaion for %s' % str(img_id) anns = coco.loadAnns(annIds) # assert len(anns) > 0, 'No annotaion for %s' % str(img_id) masks = [] classes = [] bboxes = [] mask = np.zeros((height, width), dtype=np.float32) segmentations = [] for ann in anns: m = coco.annToMask(ann) # zero one mask # m = np.zeros([height, width], dtype=np.int32) assert m.shape[0] == height and m.shape[1] == width, \ 'image %s and ann %s dont match' % (img_id, ann) masks.append(m) cat_id = self._cat_id_to_real_id(ann['category_id']) if ann['iscrowd']: cat_id = -1 classes.append(cat_id) bboxes.append(ann['bbox']) m = m.astype(np.float32) * cat_id mask[m > 0] = m[m > 0] masks = np.asarray(masks) classes = np.asarray(classes) bboxes = np.asarray(bboxes) # to x1, y1, x2, y2 if bboxes.shape[0] <= 0: bboxes = np.zeros([0, 4], dtype=np.float32) classes = np.zeros([0], dtype=np.float32) masks = np.zeros([1, height, width], dtype=np.int32) # force to 1 # print ('None Annotations %s' % img_name) LOG('None Annotations %s' % img_name, onscreen=False) bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2] bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3] assert classes.shape[0] == bboxes.shape[ 0], 'Processing Annotation Error' return bboxes.astype(np.float32), classes.astype( np.float32), masks.astype(np.int32), mask.astype(np.int32)
def sample_rpn_outputs(boxes, scores, is_training=False, only_positive=False): """Sample boxes according to scores and some learning strategies assuming the first class is background Params: boxes: of shape (..., Ax4), each entry is [x1, y1, x2, y2], the last axis has k*4 dims scores: of shape (..., A), probs of fg, in [0, 1] #but the boxex are allready in form [-1,4] also scores is in shape [-1,1] """ min_size = cfg.FLAGS.min_size rpn_nms_threshold = cfg.FLAGS.rpn_nms_threshold pre_nms_top_n = cfg.FLAGS.pre_nms_top_n post_nms_top_n = cfg.FLAGS.post_nms_top_n # training: 12000, 2000 # testing: 6000, 400 if not is_training: pre_nms_top_n = int(pre_nms_top_n / 2) post_nms_top_n = int(post_nms_top_n / 5) boxes = boxes.reshape((-1, 4)) scores = scores.reshape((-1, 1)) assert scores.shape[0] == boxes.shape[0], 'scores and boxes dont match' # filter backgrounds # Hope this will filter most of background anchors, since a argsort is too slow.. if only_positive: keeps = np.where(scores > 0.5)[0] boxes = boxes[keeps, :] scores = scores[keeps] # filter minimum size keeps = _filter_boxes(boxes, min_size=min_size) boxes = boxes[keeps, :] scores = scores[keeps] # filter with scores order = scores.ravel().argsort()[::-1] if pre_nms_top_n > 0: order = order[:pre_nms_top_n] boxes = boxes[order, :] scores = scores[order] # filter with nms det = np.hstack((boxes, scores)).astype(np.float32) keeps = nms_wrapper.nms(det, rpn_nms_threshold) if post_nms_top_n > 0: keeps = keeps[:post_nms_top_n] boxes = boxes[keeps, :] scores = scores[keeps] batch_inds = np.zeros([boxes.shape[0]], dtype=np.int32) # # random sample boxes ## try early sample later # fg_inds = np.where(scores > 0.5)[0] # num_fgs = min(len(fg_inds.size), int(rois_per_image * fg_roi_fraction)) if _DEBUG: LOG('SAMPLE: %d rois has been choosen' % len(scores)) LOG('SAMPLE: a positive box: %d %d %d %d %.4f' % (boxes[0, 0], boxes[0, 1], boxes[0, 2], boxes[0, 3], scores[0])) LOG('SAMPLE: a negative box: %d %d %d %d %.4f' % (boxes[-1, 0], boxes[-1, 1], boxes[-1, 2], boxes[-1, 3], scores[-1])) hs = boxes[:, 3] - boxes[:, 1] ws = boxes[:, 2] - boxes[:, 0] assert min(np.min(hs), np.min(ws)) > 0, 'invalid boxes' return boxes, scores.astype(np.float32), batch_inds
def encode(gt_boxes, rois, num_classes): """Matching and Encoding groundtruth boxes (gt_boxes) into learning targets to boxes Sampling Parameters --------- gt_boxes an array of shape (G x 5), [x1, y1, x2, y2, class] rois an array of shape (R x 4), [x1, y1, x2, y2] num_classes: scalar, number of classes Returns -------- labels: Nx1 array in [0, num_classes) bbox_targets: of shape (N, Kx4) regression targets bbox_inside_weights: of shape (N, Kx4), in {0, 1} indicating which class is assigned. """ all_rois = rois num_rois = rois.shape[0] if gt_boxes.size > 0: # R x G matrix overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(all_rois[:, 0:4], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # R # max_overlaps = overlaps.max(axis=1) # R max_overlaps = overlaps[np.arange(rois.shape[0]), gt_assignment] # note: this will assign every rois with a positive label # labels = gt_boxes[gt_assignment, 4] labels = np.zeros([num_rois], dtype=np.float32) labels[:] = -1 # if _DEBUG: # print ('gt_assignment') # print (gt_assignment) # sample rois as to 1:3 fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0] fg_rois = int( min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction)) if fg_inds.size > 0 and fg_rois < fg_inds.size: fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False) labels[fg_inds] = gt_boxes[gt_assignment[fg_inds], 4] # TODO: sampling strategy bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0] bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 64) if bg_inds.size > 0 and bg_rois < bg_inds.size: bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False) labels[bg_inds] = 0 # ignore rois with overlaps between fg_threshold and bg_threshold ignore_inds = np.where(((max_overlaps > cfg.FLAGS.bg_threshold) &\ (max_overlaps < cfg.FLAGS.fg_threshold)))[0] labels[ignore_inds] = -1 keep_inds = np.append(fg_inds, bg_inds) if _DEBUG: print('keep_inds') print(keep_inds) print('fg_inds') print(fg_inds) print('bg_inds') print(bg_inds) print('bg_rois:', bg_rois) print('cfg.FLAGS.bg_threshold:', cfg.FLAGS.bg_threshold) # print (max_overlaps) LOG('ROIEncoder: %d positive rois, %d negative rois' % (len(fg_inds), len(bg_inds))) bbox_targets, bbox_inside_weights = _compute_targets( rois[keep_inds, 0:4], gt_boxes[gt_assignment[keep_inds], :4], labels[keep_inds], num_classes) bbox_targets = _unmap(bbox_targets, num_rois, keep_inds, 0) bbox_inside_weights = _unmap(bbox_inside_weights, num_rois, keep_inds, 0) else: # there is no gt labels = np.zeros((num_rois, ), np.float32) bbox_targets = np.zeros((num_rois, 4 * num_classes), np.float32) bbox_inside_weights = np.zeros((num_rois, 4 * num_classes), np.float32) bg_rois = min( int(cfg.FLAGS.rois_per_image * (1 - cfg.FLAGS.fg_roi_fraction)), 64) if bg_rois < num_rois: bg_inds = np.arange(num_rois) ignore_inds = np.random.choice(bg_inds, size=num_rois - bg_rois, replace=False) labels[ignore_inds] = -1 return labels, bbox_targets, bbox_inside_weights
def train(): """The main function that runs training""" ## data image, original_image_height, original_image_width, image_height, image_width, gt_boxes, gt_masks, num_instances, image_id = \ datasets.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir, FLAGS.im_batch, is_training=True) ## queuing data data_queue = tf.RandomShuffleQueue(capacity=32, min_after_dequeue=16, dtypes=( image.dtype, original_image_height.dtype, original_image_width.dtype, image_height.dtype, image_width.dtype, gt_boxes.dtype, gt_masks.dtype, num_instances.dtype, image_id.dtype)) enqueue_op = data_queue.enqueue((image, original_image_height, original_image_width, image_height, image_width, gt_boxes, gt_masks, num_instances, image_id)) data_queue_runner = tf.train.QueueRunner(data_queue, [enqueue_op] * 4) tf.add_to_collection(tf.GraphKeys.QUEUE_RUNNERS, data_queue_runner) (image, original_image_height, original_image_width, image_height, image_width, gt_boxes, gt_masks, num_instances, image_id) = data_queue.dequeue() im_shape = tf.shape(image) image = tf.reshape(image, (im_shape[0], im_shape[1], im_shape[2], 3)) ## network logits, end_points, pyramid_map = network.get_network(FLAGS.network, image, weight_decay=FLAGS.weight_decay, batch_norm_decay=FLAGS.batch_norm_decay, is_training=True) outputs = pyramid_network.build(end_points, image_height, image_width, pyramid_map, num_classes=81, base_anchors=3,#9#15 is_training=True, gt_boxes=gt_boxes, gt_masks=gt_masks, loss_weights=[1.0, 1.0, 10.0, 1.0, 10.0]) # loss_weights=[10.0, 1.0, 0.0, 0.0, 0.0]) # loss_weights=[100.0, 100.0, 1000.0, 10.0, 100.0]) # loss_weights=[0.2, 0.2, 1.0, 0.2, 1.0]) # loss_weights=[0.1, 0.01, 10.0, 0.1, 1.0]) total_loss = outputs['total_loss'] losses = outputs['losses'] batch_info = outputs['batch_info'] regular_loss = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) input_image = end_points['input'] training_rcnn_rois = outputs['training_rcnn_rois'] training_rcnn_clses = outputs['training_rcnn_clses'] training_rcnn_clses_target = outputs['training_rcnn_clses_target'] training_rcnn_scores = outputs['training_rcnn_scores'] training_mask_rois = outputs['training_mask_rois'] training_mask_clses_target = outputs['training_mask_clses_target'] training_mask_final_mask = outputs['training_mask_final_mask'] training_mask_final_mask_target = outputs['training_mask_final_mask_target'] tmp_0 = outputs['rpn']['P2']['shape'] tmp_1 = outputs['rpn']['P3']['shape'] tmp_2 = outputs['rpn']['P4']['shape'] tmp_3 = outputs['rpn']['P5']['shape'] ## solvers global_step = slim.create_global_step() update_op = solve(global_step) cropped_rois = tf.get_collection('__CROPPED__')[0] transposed = tf.get_collection('__TRANSPOSED__')[0] gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) #gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) #sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options)) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) sess.run(init_op) summary_op = tf.summary.merge_all() logdir = os.path.join(FLAGS.train_dir, strftime('%Y%m%d%H%M%S', gmtime())) if not os.path.exists(logdir): os.makedirs(logdir) summary_writer = tf.summary.FileWriter(logdir, graph=sess.graph) ## restore restore(sess) ## coord settings coord = tf.train.Coordinator() threads = [] for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): threads.extend(qr.create_threads(sess, coord=coord, daemon=True, start=True)) tf.train.start_queue_runners(sess=sess, coord=coord) ## saver init saver = tf.train.Saver(max_to_keep=20) ## finalize the graph for checking memory leak sess.graph.finalize() ## main loop for step in range(FLAGS.max_iters): start_time = time.time() s_, tot_loss, reg_lossnp, image_id_str, \ rpn_box_loss, rpn_cls_loss, rcnn_box_loss, rcnn_cls_loss, mask_loss, \ gt_boxesnp, tmp_0np, tmp_1np, tmp_2np, tmp_3np, \ rpn_batch_pos, rpn_batch, rcnn_batch_pos, rcnn_batch, mask_batch_pos, mask_batch, \ input_imagenp, \ training_rcnn_roisnp, training_rcnn_clsesnp, training_rcnn_clses_targetnp, training_rcnn_scoresnp, training_mask_roisnp, training_mask_clses_targetnp, training_mask_final_masknp, training_mask_final_mask_targetnp = \ sess.run([update_op, total_loss, regular_loss, image_id] + losses + [gt_boxes] + [tmp_0] + [tmp_1] + [tmp_2] +[tmp_3] + batch_info + [input_image] + [training_rcnn_rois] + [training_rcnn_clses] + [training_rcnn_clses_target] + [training_rcnn_scores] + [training_mask_rois] + [training_mask_clses_target] + [training_mask_final_mask] + [training_mask_final_mask_target]) duration_time = time.time() - start_time if step % 1 == 0: LOG ( """iter %d: image-id:%07d, time:%.3f(sec), regular_loss: %.6f, """ """total-loss %.4f(%.4f, %.4f, %.6f, %.4f, %.4f), """ """instances: %d, """ """batch:(%d|%d, %d|%d, %d|%d)""" % (step, image_id_str, duration_time, reg_lossnp, tot_loss, rpn_box_loss, rpn_cls_loss, rcnn_box_loss, rcnn_cls_loss, mask_loss, gt_boxesnp.shape[0], rpn_batch_pos, rpn_batch, rcnn_batch_pos, rcnn_batch, mask_batch_pos, mask_batch)) LOG ("target") LOG (cat_id_to_cls_name(np.unique(np.argmax(np.asarray(training_rcnn_clses_targetnp),axis=1)))) LOG ("predict") LOG (cat_id_to_cls_name(np.unique(np.argmax(np.array(training_rcnn_clsesnp),axis=1)))) LOG (tmp_0np) LOG (tmp_1np) LOG (tmp_2np) LOG (tmp_3np) if step % 50 == 0: draw_bbox(step, np.uint8((np.array(input_imagenp[0])/2.0+0.5)*255.0), name='train_est', bbox=training_rcnn_roisnp, label=np.argmax(np.array(training_rcnn_scoresnp),axis=1), prob=training_rcnn_scoresnp, # bbox=training_mask_roisnp, # label=training_mask_clses_targetnp, # prob=np.zeros((training_mask_final_masknp.shape[0],81), dtype=np.float32)+1.0, # mask=training_mask_final_masknp, vis_all=True) draw_bbox(step, np.uint8((np.array(input_imagenp[0])/2.0+0.5)*255.0), name='train_gt', bbox=training_rcnn_roisnp, label=np.argmax(np.array(training_rcnn_clses_targetnp),axis=1), prob=np.zeros((training_rcnn_clsesnp.shape[0],81), dtype=np.float32)+1.0, # bbox=training_mask_roisnp, # label=training_mask_clses_targetnp, # prob=np.zeros((training_mask_final_masknp.shape[0],81), dtype=np.float32)+1.0, # mask=training_mask_final_mask_targetnp, vis_all=True) if np.isnan(tot_loss) or np.isinf(tot_loss): LOG (gt_boxesnp) raise if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) summary_writer.flush() if (step % 500 == 0 or step + 1 == FLAGS.max_iters) and step != 0: checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.dataset_name + '_' + FLAGS.network + '_model.ckpt') saver.save(sess, checkpoint_path, global_step=step) if coord.should_stop(): coord.request_stop() coord.join(threads) gc.collect()
def encode(gt_boxes, all_anchors, height, width, stride): """Matching and Encoding groundtruth into learning targets Sampling Parameters --------- gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class] all_anchors: an array of shape (h, w, A, 4), width: width of feature height: height of feature stride: downscale factor w.r.t the input size, e.g., [4, 8, 16, 32] Returns -------- labels: Nx1 array in [0, num_classes] bbox_targets: N x (4) regression targets bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned. """ # TODO: speedup this module # if all_anchors is None: # all_anchors = anchors_plane(height, width, stride=stride) # # anchors, inds_inside, total_anchors # border = cfg.FLAGS.allow_border # all_anchors = all_anchors.reshape((-1, 4)) # inds_inside = np.where( # (all_anchors[:, 0] >= -border) & # (all_anchors[:, 1] >= -border) & # (all_anchors[:, 2] < (width * stride) + border) & # (all_anchors[:, 3] < (height * stride) + border))[0] # anchors = all_anchors[inds_inside, :] all_anchors = all_anchors.reshape([-1, 4]) anchors = all_anchors total_anchors = all_anchors.shape[0] # labels = np.zeros((anchors.shape[0], ), dtype=np.float32) labels = np.empty((anchors.shape[0], ), dtype=np.float32) labels.fill(-1) if gt_boxes.size > 0: overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) # if _DEBUG: # print ('gt_boxes shape: ', gt_boxes.shape) # print ('anchors shape: ', anchors.shape) # print ('overlaps shape: ', overlaps.shape) gt_assignment = overlaps.argmax(axis=1) # (A) max_overlaps = overlaps[np.arange(total_anchors), gt_assignment] gt_argmax_overlaps = overlaps.argmax(axis=0) # G gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] labels[max_overlaps < cfg.FLAGS.rpn_bg_threshold] = 0 if True: # this is sentive to boxes of little overlaps, no need! # gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # fg label: for each gt, hard-assign anchor with highest overlap despite its overlaps labels[gt_argmax_overlaps] = 1 # exclude examples with little overlaps # added later # excludes = np.where(gt_max_overlaps < cfg.FLAGS.bg_threshold)[0] # labels[gt_argmax_overlaps[excludes]] = -1 if _DEBUG: min_ov = np.min(gt_max_overlaps) max_ov = np.max(gt_max_overlaps) mean_ov = np.mean(gt_max_overlaps) if min_ov < cfg.FLAGS.bg_threshold: LOG('ANCHOREncoder: overlaps: (min %.3f mean:%.3f max:%.3f), stride: %d, shape:(h:%d, w:%d)' % (min_ov, mean_ov, max_ov, stride, height, width)) worst = gt_boxes[np.argmin(gt_max_overlaps)] anc = anchors[ gt_argmax_overlaps[np.argmin(gt_max_overlaps)], :] LOG('ANCHOREncoder: worst case: overlap: %.3f, box:(%.1f, %.1f, %.1f, %.1f %d), anchor:(%.1f, %.1f, %.1f, %.1f)' % (min_ov, worst[0], worst[1], worst[2], worst[3], worst[4], anc[0], anc[1], anc[2], anc[3])) # fg label: above threshold IOU labels[max_overlaps >= cfg.FLAGS.rpn_fg_threshold] = 1 # print (np.min(labels), np.max(labels)) # subsample positive labels if there are too many num_fg = int(cfg.FLAGS.fg_rpn_fraction * cfg.FLAGS.rpn_batch_size) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 else: # if there is no gt labels[:] = 0 # TODO: mild hard negative mining # subsample negative labels if there are too many num_fg = np.sum(labels == 1) num_bg = max(min(cfg.FLAGS.rpn_batch_size - num_fg, num_fg * 3), 8) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets = _compute_targets(anchors, gt_boxes[gt_assignment, :]) bbox_inside_weights = np.zeros((total_anchors, 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = 0.1 # # mapping to whole outputs # labels = _unmap(labels, total_anchors, inds_inside, fill=-1) # bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) # bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) labels = labels.reshape((1, height, width, -1)) bbox_targets = bbox_targets.reshape((1, height, width, -1)) bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, -1)) return labels, bbox_targets, bbox_inside_weights
def encode(gt_boxes, all_anchors): """Single Shot Sampling Parameters --------- gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class] all_anchors: an array of shape (h, w, A, 4), Returns -------- labels: Nx1 array in [-1, num_classes], negative labels are ignored bbox_targets: N x (4) regression targets bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned. """ all_anchors = all_anchors.reshape([-1, 4]) anchors = all_anchors total_anchors = all_anchors.shape[0] bbox_flags_ = np.zeros([total_anchors], dtype=np.int32) if gt_boxes.size > 0: overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # (A) max_overlaps = overlaps[np.arange(total_anchors), gt_assignment] gt_argmax_overlaps = overlaps.argmax(axis=0) # (G) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] labels = gt_boxes[gt_assignment, 4] labels[max_overlaps < cfg.rpn_bg_threshold] = 0 labels[np.logical_and(max_overlaps < cfg.rpn_fg_threshold, max_overlaps >= cfg.rpn_bg_threshold)] = -1 bbox_flags_[max_overlaps >= 0.5] = 1 # fg label: for each gt, hard-assign anchor with highest overlap despite its overlaps labels[gt_argmax_overlaps] = gt_boxes[ gt_assignment[gt_argmax_overlaps], 4] # bbox_flags_[gt_argmax_overlaps] = 1 # if clobber positive: there may exist some positive objs (jaccard overlap < bg_th) that are not assigned to any anchors if cfg.rpn_clobber_positives: labels[max_overlaps < cfg.rpn_bg_threshold] = 0 bbox_flags_[labels >= 1] = 1 if _DEBUG: min_ov = np.min(gt_max_overlaps) max_ov = np.max(gt_max_overlaps) mean_ov = np.mean(gt_max_overlaps) if min_ov < cfg.rpn_bg_threshold: LOG('ANCHORSS: overlaps: (min %.3f mean:%.3f max:%.3f)' % (min_ov, mean_ov, max_ov)) worst = gt_boxes[np.argmin(gt_max_overlaps)] anc = anchors[ gt_argmax_overlaps[np.argmin(gt_max_overlaps)], :] LOG('ANCHORSS: worst overlap:%.3f, box:(%.1f, %.1f, %.1f, %.1f %d), anchor:(%.1f, %.1f, %.1f, %.1f)' % (min_ov, worst[0], worst[1], worst[2], worst[3], worst[4], anc[0], anc[1], anc[2], anc[3])) ## handle ignored regions (the gt_class of crowd boxes is set to -1) ignored_inds = np.where(gt_boxes[:, -1] < 0)[0] if ignored_inds.size > 0: ignored_areas = gt_boxes[ignored_inds, :] # intersec shape is D x A intersecs = cython_bbox.bbox_intersections( np.ascontiguousarray(ignored_areas, dtype=np.float), np.ascontiguousarray(anchors, dtype=np.float)) intersecs_ = intersecs.sum(axis=0) # A x 1 labels[intersecs_ > cfg.ignored_area_intersection_fraction] = -1 bbox_flags_[ intersecs_ > cfg.ignored_area_intersection_fraction] = 0 else: # if there is no gt labels = np.zeros([total_anchors], dtype=np.float32) label_weights = np.zeros((total_anchors, ), dtype=np.float32) if cfg.rpn_sample_strategy == 'traditional': """subsample positive labels if there are too many, inherited from fastrcnn""" num_fg = int(cfg.rpn_fg_fraction * cfg.rpn_batch_size) fg_inds = np.where(labels >= 1)[0] if len(fg_inds) > num_fg: disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 else: num_fg = len(fg_inds) # subsample negative labels if there are too many num_bg = max(min(cfg.rpn_batch_size - num_fg, num_fg * 5), 128) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 elif cfg.rpn_sample_strategy == 'simple': """using label_weights to balance example losses""" fg_inds = np.where(labels >= 1)[0] num_fg = len(fg_inds) label_weights[fg_inds] = 1.0 bg_inds = np.where(labels == 0)[0] num_bg = len(bg_inds) label_weights[bg_inds] = 3 * max(num_fg, 1.0) / max( max(num_bg, num_fg), 1.0) elif cfg.rpn_sample_strategy == 'advanced': """no implemented yet""" # deal with ignored lables? else: raise ValueError( 'RPN sample strategy %s has not been implemented yet' % cfg.rpn_sample_strategy) # if True: # person only # nonperson_inds = np.where(np.logical_and(labels != 1, labels != -1))[0] # labels[nonperson_inds] = 0 # label_weights[nonperson_inds] = 0 # kept_inds = np.random.choice(nonperson_inds, size=(1000), replace=False) # label_weights[kept_inds] = 0.02 bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets = _compute_targets(anchors, gt_boxes[gt_assignment, :]) bbox_inside_weights = np.zeros((total_anchors, 4), dtype=np.float32) # bbox_inside_weights[labels >= 1, :] = np.asarray(cfg.bbweights, dtype=np.float32) bbox_inside_weights[bbox_flags_ == 1, :] = np.asarray(cfg.bbweights, dtype=np.float32) labels = labels.reshape((-1, )) bbox_targets = bbox_targets.reshape((-1, 4)) bbox_inside_weights = bbox_inside_weights.reshape((-1, 4)) return labels.astype(np.float32), label_weights, bbox_targets.astype( np.float32), bbox_inside_weights.astype(np.float32)
def encode(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width): """Encode masks groundtruth into learnable targets Sample some exmaples Params ------ gt_masks: image_height x image_width {0, 1} matrix, of shape (G, imh, imw) #actually modified by me, gt_mask is of shape (G,imh,imw,7) gt_boxes: ground-truth boxes of shape (G, 5), each raw is [x1, y1, x2, y2, class] rois: the bounding boxes of shape (N, 4), ## scores: scores of shape (N, 1) num_classes; K mask_height, mask_width: height and width of output masks Returns ------- # rois: boxes sampled for cropping masks, of shape (M, 4) labels: class-ids of shape (M, 1) mask_targets: learning targets of shape (M, pooled_height, pooled_width, K) in {0, 1} values mask_inside_weights: of shape (M, pooled_height, pooled_width, K) in {0, 1}Í indicating which mask is sampled """ total_masks = rois.shape[0] if gt_boxes.size > 0: # B x G overlaps = cython_bbox.bbox_overlaps( np.ascontiguousarray(rois[:, 0:4], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) # shape is N max_overlaps = overlaps[np.arange(len(gt_assignment)), gt_assignment] # N # note: this will assign every rois with a positive label # labels = gt_boxes[gt_assignment, 4] # N labels = np.zeros((total_masks, ), np.float32) labels[:] = -1 # sample positive rois which intersection is more than 0.5 keep_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0] num_masks = int(min(keep_inds.size, cfg.FLAGS.masks_per_image)) if keep_inds.size > 0 and num_masks < keep_inds.size: keep_inds = np.random.choice(keep_inds, size=num_masks, replace=False) LOG('Masks: %d of %d rois are considered positive mask. Number of masks %d'\ %(num_masks, rois.shape[0], gt_masks.shape[0])) labels[keep_inds] = gt_boxes[gt_assignment[keep_inds], -1] # rois = rois[inds] # labels = labels[inds].astype(np.int32) # gt_assignment = gt_assignment[inds] # ignore rois with overlaps between fg_threshold and bg_threshold # mask are only defined on positive rois ignore_inds = np.where((max_overlaps < cfg.FLAGS.fg_threshold))[0] labels[ignore_inds] = -1 mask_targets = np.zeros( (total_masks, mask_height, mask_width, num_classes), dtype=np.int32) mask_inside_weights = np.zeros( (total_masks, mask_height, mask_width, num_classes), dtype=np.float32) rois[rois < 0] = 0 # TODO: speed bottleneck? for i in keep_inds: roi = rois[i, :4] for x in range(7): cropped = gt_masks[gt_assignment[i], int(roi[1]):int(roi[3]) + 1, int(roi[0]):int(roi[2]) + 1, x] cropped = cv2.resize(cropped, (mask_width, mask_height), interpolation=cv2.INTER_NEAREST) mask_targets[i, :, :, x] = cropped mask_inside_weights[i, :, :, x] = 1 else: # there is no gt labels = np.zeros((total_masks, ), np.float32) labels[:] = -1 mask_targets = np.zeros( (total_masks, mask_height, mask_width, num_classes), dtype=np.int32) mask_inside_weights = np.zeros( (total_masks, mask_height, mask_height, num_classes), dtype=np.float32) #np.save("/home/czurini/Alex/rois.npy",rois) # np.save("/home/czurini/Alex/mask_targets.npy",mask_targets) return labels, mask_targets, mask_inside_weights