def __init__ (self, min_size=1): super().__init__() self.gt_matcher = cpp.GTMatcher(FLAGS.match_th, FLAGS.max_masks, min_size) self.priors = [] if os.path.exists(FLAGS.priors): with open(FLAGS.priors, 'r') as f: for l in f: if l[0] == '#': continue s, r = l.strip().split(' ') s, r = float(s), float(r) # w * h = s * s # w / h = r w = math.sqrt(s * s * r) h = math.sqrt(s * s / r) self.priors.append([w, h]) pass pass pass aardvark.print_red("PRIORS %s" % str(self.priors)) # TODO: need a better way to generalize this to multiple priors and 0 priors self.n_priors = len(self.priors) if self.n_priors == 0: self.n_priors = 1 pass
def __init__(self, min_size=1): super().__init__() self.gt_matcher = cpp.GTMatcher(FLAGS.match_th, FLAGS.max_masks, min_size) self.priors = [] if os.path.exists('priors'): with open('priors', 'r') as f: for l in f: if l[0] == '#': continue s, r = l.strip().split(' ') s, r = float(s), float(r) # w * h = s * s # w / h = r w = math.sqrt(s * s * r) h = math.sqrt(s * s / r) self.priors.append([w, h]) pass pass pass self.n_priors = len(self.priors) if self.n_priors == 0: self.n_priors = 1 pass
def __init__(self, priors=1): self.priors = priors # number of priors self.gt_matcher = cpp.GTMatcher(FLAGS.match_th, FLAGS.max_masks) pass
def __init__ (self, min_size=1): super().__init__() self.gt_matcher = cpp.GTMatcher(FLAGS.match_th, FLAGS.max_masks, min_size, False) pass
def create_model (inputs, backbone_fn): #box_ft, mask_ft, gt_masks, gt_anchors, gt_anchors_weight, gt_params, gt_params_weight, gt_boxes, config): # ft: B * H' * W' * 3 input feature, H' W' is feature map size # gt_counts: B number of boxes in each sample of the batch # gt_boxes: ? * 4 boxes bb, _ = backbone_fn(inputs.X-PIXEL_MEANS, global_pool=False, output_stride=FLAGS.backbone_stride) #bb2, _ = backbone_fn(inputs.X-PIXEL_MEANS, global_pool=False, output_stride=FLAGS.backbone_stride, scope='bb2') gt_matcher = cpp.GTMatcher(FLAGS.match_th, FLAGS.max_masks) mask_extractor = cpp.MaskExtractor(FLAGS.mask_size, FLAGS.mask_size) end_points = {} with tf.variable_scope('boxnet'): assert FLAGS.backbone_stride % FLAGS.anchor_stride == 0 ss = FLAGS.backbone_stride // FLAGS.anchor_stride # generate anchor feature anchor_logits_ft = slim.conv2d_transpose(bb, FLAGS.anchor_logit_filters, ss*2, ss) anchor_params_ft = slim.conv2d_transpose(bb, FLAGS.anchor_params_filters, ss*2, ss) assert FLAGS.backbone_stride % FLAGS.mask_stride == 0 ss = FLAGS.backbone_stride // FLAGS.mask_stride mask_ft = slim.conv2d_transpose(bb, FLAGS.mask_filters, ss*2, ss) anchor_logits = slim.conv2d(anchor_logits_ft, 2 * len(PRIORS), 3, 1, activation_fn=None) anchor_logits2 = tf.reshape(anchor_logits, (-1, 2)) # ? * 2 # anchor probabilities anchor_prob = tf.squeeze(tf.slice(tf.nn.softmax(anchor_logits2), [0, 1], [-1, 1]), 1) gt_anchors = tf.reshape(inputs.gt_anchors, (-1, )) gt_anchors_weight = tf.reshape(inputs.gt_anchors_weight, (-1,)) # anchor cross-entropy axe = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=anchor_logits2, labels=gt_anchors) axe = axe * gt_anchors_weight axe = tf.reduce_sum(axe) / (tf.reduce_sum(gt_anchors_weight) + 1) params = slim.conv2d(anchor_params_ft, 4 * len(PRIORS), 3, 1, activation_fn=None) params = tf.reshape(params, (-1, 4)) # ? * 4 gt_params = tf.reshape(inputs.gt_params, (-1, 4)) gt_params_weight = tf.reshape(inputs.gt_params_weight, (-1,)) # params loss if True: dxy, wh = tf.split(params, [2,2], 1) dxy_gt, wh_gt = tf.split(gt_params, [2,2], 1) #wh = tf.log(tf.nn.relu(wh) + 1) wh_gt = tf.log(wh_gt + 1) pl = tf.losses.huber_loss(dxy, dxy_gt, reduction=tf.losses.Reduction.NONE) + \ tf.losses.huber_loss(wh, wh_gt, reduction=tf.losses.Reduction.NONE) pl = tf.reduce_sum(pl, axis=1) pl = tf.reduce_sum(pl * gt_params_weight) / (tf.reduce_sum(gt_params_weight) + 1) # generate boxes from anchor params boxes, box_ind = anchors2boxes(tf.shape(anchor_logits_ft), params) boxes_pre = boxes sel = tf.greater_equal(anchor_prob, inputs.anchor_th) # sel is a boolean mask # select only boxes with prob > th for nms anchor_prob = tf.boolean_mask(anchor_prob, sel) boxes = tf.boolean_mask(boxes, sel) box_ind = tf.boolean_mask(box_ind, sel) sel = tf.image.non_max_suppression(shift_boxes(boxes, box_ind), anchor_prob, 100000, iou_threshold=inputs.nms_th) # sel is a list of indices if True: # prediction head, not used in training psel = tf.slice(sel, [0], [tf.minimum(inputs.nms_max, tf.shape(sel)[0])]) boxes_predicted = tf.gather(boxes, psel) box_ind_predicted = tf.gather(box_ind, psel) mlogits = mask_net(inputs.X, mask_ft, boxes_predicted, box_ind_predicted) masks_predicted = tf.squeeze(tf.slice(tf.nn.softmax(mlogits), [0, 0, 0, 1], [-1, -1, -1, 1]), 3) pass anchor_prob = None # discard boxes = tf.gather(boxes, sel) box_ind = tf.gather(box_ind, sel) hit, index, gt_index = tf.py_func(gt_matcher.apply, [boxes, box_ind, inputs.gt_boxes], [tf.float32, tf.int32, tf.int32]) # % boxes found precision = hit / tf.cast(tf.shape(boxes)[0] + 1, tf.float32); recall = hit / tf.cast(tf.shape(inputs.gt_boxes)[0] + 1, tf.float32); boxes = tf.gather(boxes, index) box_ind = tf.gather(box_ind, index) gt_boxes = tf.gather(inputs.gt_boxes, gt_index) # normalize boxes to [0-1] nboxes = normalize_boxes(tf.shape(inputs.X), boxes) mlogits = mask_net(inputs.X, mask_ft, boxes, box_ind) gt_masks, = tf.py_func(mask_extractor.apply, [inputs.gt_masks, gt_boxes, boxes], [tf.float32]) #gt_masks, = tf.py_func(mask_extractor.apply, [inputs.gt_masks, gt_boxes, tf.slice(gt_boxes, [0, 3], [-1, 4])], [tf.float32]) end_points['gt_boxes'] = gt_boxes end_points['boxes'] = boxes gt_masks = tf.cast(tf.round(gt_masks), tf.int32) end_points['gt_masks'] = gt_masks # mask cross entropy mxe = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=mlogits, labels=gt_masks) mxe = tf.reshape(mxe, (-1, )) mxe = tf.reduce_sum(mxe) / tf.cast(tf.shape(mxe)[0] + 1, tf.float32) #tf.identity(logits, name='logits') #tf.identity(params, name='params') #tf.identity(boxes_pre, name='boxes_pre') tf.identity(boxes_predicted, name='boxes') tf.identity(masks_predicted, name='masks') #tf.identity(mlogits, name='mlogits') axe = tf.identity(axe, name='ax') # cross-entropy mxe = tf.identity(mxe, name='mx') # cross-entropy pl = tf.identity(pl * FLAGS.pl_weight, name='pl') # params-loss reg = tf.identity(tf.reduce_sum(tf.losses.get_regularization_losses()) * FLAGS.re_weight, name='re') precision = tf.identity(precision, name='p') recall = tf.identity(recall, name='r') loss = tf.identity(axe + mxe + pl + reg, name='lo') return loss, [axe, mxe, pl, reg, precision, recall], end_points