def _compute_targets(rois, overlaps, labels): """Compute bounding-box regression targets for an image.""" # Indices of ground-truth ROIs gt_inds = np.where(overlaps == 1)[0] if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return np.zeros((rois.shape[0], 3), dtype=np.float32) # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.TWIN_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = twin_overlaps( np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] targets = np.zeros((rois.shape[0], 3), dtype=np.float32) targets[ex_inds, 0] = labels[ex_inds] targets[ex_inds, 1:] = twin_transform(ex_rois, gt_rois) return targets
def _sample_positive_rois(all_rois, gt_wins, captions, fc_features): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_wins) overlaps = twin_overlaps( np.ascontiguousarray(all_rois[:, 1:3], dtype=np.float), np.ascontiguousarray(gt_wins[:, :2], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) # labels = gt_wins[gt_assignment, 2] input_sent = captions[gt_assignment, 0, :].reshape( (gt_assignment.shape[0], -1)).transpose((1, 0)) cont_sent = captions[gt_assignment, 1, :].reshape( (gt_assignment.shape[0], -1)).transpose((1, 0)) target_sent = captions[gt_assignment, 2, :].reshape( (gt_assignment.shape[0], -1)).transpose((1, 0)) # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.CAPTION_FG_THRESH)[ 0] # __C.TRAIN.FG_THRESH = 0.5 # The indices that we're selecting (fg) keep_inds = fg_inds rois = all_rois[keep_inds] fc_features = fc_features[keep_inds, :] input_sent = input_sent[:, keep_inds] cont_sent = cont_sent[:, keep_inds] target_sent = target_sent[:, keep_inds] return cont_sent, input_sent, target_sent, fc_features, rois, keep_inds
def _sample_rois(all_rois, gt_wins, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_wins) overlaps = twin_overlaps( np.ascontiguousarray(all_rois[:, 1:3], dtype=np.float), np.ascontiguousarray(gt_wins[:, :2], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_wins[gt_assignment, 2] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where( max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # __C.TRAIN.FG_THRESH = 0.5 # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[fg_rois_per_this_image:] = 0 # labels[labels>0]=1 rois = all_rois[keep_inds] twin_target_data = _compute_targets(rois[:, 1:3], gt_wins[gt_assignment[keep_inds], :2], labels) twin_targets, twin_inside_weights = \ _get_twin_regression_labels(twin_target_data, num_classes) return labels, rois, twin_targets, twin_inside_weights
def _sample_all_rois(all_rois, gt_wins, num_classes): """Generate all RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_wins) overlaps = twin_overlaps( np.ascontiguousarray(all_rois[:, 1:3], dtype=np.float), np.ascontiguousarray(gt_wins[:, :2], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_wins[gt_assignment, 2] labels = labels rois = all_rois twin_target_data = _compute_targets(rois[:, 1:3], gt_wins[gt_assignment, :2], labels) twin_targets, twin_inside_weights = \ _get_twin_regression_labels(twin_target_data, num_classes) return labels, rois, twin_targets, twin_inside_weights
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted twin deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) length, height, width = bottom[0].data.shape[-3:] # GT boxes (x1, x2, label) gt_boxes = bottom[1].data # what if there is no GT segments in this 512 frames?(already filter out) if DEBUG: print '' print 'length, height, width: ({}, {}, {})'.format(length, height, width) print 'rpn: gt_boxes.shape', gt_boxes.shape print 'rpn: gt_boxes', gt_boxes # 1. Generate proposals from twin deltas and shifted anchors shifts = np.arange(0, length) * self._feat_stride # add A anchors (1, A, 2) to # cell K shifts (K, 1, 2) to get # shift anchors (K, A, 2) # reshape to (K*A, 2) shifted anchors A = self._num_anchors K = shifts.shape[0] all_anchors = (self._anchors.reshape((1, A, 2)) + shifts.reshape((1, K, 1)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 2)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -self._allowed_border) & (all_anchors[:, 1] < bottom[2].data.shape[2] + self._allowed_border) # length )[0] if DEBUG: print 'total_anchors', total_anchors print 'inds_inside', len(inds_inside) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors.shape', anchors.shape print 'anchors', anchors # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = twin_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if DEBUG: print "max_overlaps", max_overlaps print "gt_max_overlaps", gt_max_overlaps if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # __C.TRAIN.RPN_CLOBBER_POSITIVES = False # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #print "was %s inds, disabling %s, now %s inds" % ( #len(bg_inds), len(disable_inds), np.sum(labels == 0)) twin_targets = np.zeros((len(inds_inside), 2), dtype=np.float32) twin_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) if DEBUG: print "twin_targets", twin_targets twin_inside_weights = np.zeros((len(inds_inside), 2), dtype=np.float32) twin_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_TWIN_INSIDE_WEIGHTS) # __C.TRAIN.RPN_TWIN_INSIDE_WEIGHTS = (1.0, 1.0) twin_outside_weights = np.zeros((len(inds_inside), 2), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 2)) * 1.0 / num_examples negative_weights = np.ones((1, 2)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) twin_outside_weights[labels == 1, :] = positive_weights twin_outside_weights[labels == 0, :] = negative_weights if DEBUG: self._sums += twin_targets[labels == 1, :].sum(axis=0) self._squared_sums += (twin_targets[labels == 1, :] ** 2).sum(axis=0) self._counts += np.sum(labels == 1) means = self._sums / self._counts stds = np.sqrt(self._squared_sums / self._counts - means ** 2) print 'means:' print means print 'stdevs:' print stds # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) twin_targets = _unmap(twin_targets, total_anchors, inds_inside, fill=0) twin_inside_weights = _unmap(twin_inside_weights, total_anchors, inds_inside, fill=0) twin_outside_weights = _unmap(twin_outside_weights, total_anchors, inds_inside, fill=0) if DEBUG: print 'rpn: max max_overlap', np.max(max_overlaps) print 'rpn: num_positive', np.sum(labels == 1) print 'rpn: num_negative', np.sum(labels == 0) self._fg_sum += np.sum(labels == 1) self._bg_sum += np.sum(labels == 0) self._count += 1 print 'rpn: num_positive avg', self._fg_sum / self._count print 'rpn: num_negative avg', self._bg_sum / self._count print 'rpn: num_positive', np.sum(labels == 1) print 'rpn: num_negative', np.sum(labels == 0) # print "RPN: accuracy > ", float(max(np.sum(labels == 1), np.sum(labels == 0))) / ( np.sum(labels == 1) + np.sum(labels == 0) ) # labels labels = labels.reshape((1, length, height, width, A)).transpose(0, 4, 1, 2, 3) labels = labels.reshape((1, 1, A * length, height, width)) top[0].reshape(*labels.shape) top[0].data[...] = labels # twin_targets twin_targets = twin_targets \ .reshape((1, length, height, width, A * 2)).transpose(0, 4, 1, 2, 3) top[1].reshape(*twin_targets.shape) top[1].data[...] = twin_targets # twin_inside_weights twin_inside_weights = twin_inside_weights \ .reshape((1, length, height, width, A * 2)).transpose(0, 4, 1, 2, 3) assert twin_inside_weights.shape[3] == height assert twin_inside_weights.shape[4] == width top[2].reshape(*twin_inside_weights.shape) top[2].data[...] = twin_inside_weights # twin_outside_weights twin_outside_weights = twin_outside_weights \ .reshape((1, length, height, width, A * 2)).transpose(0, 4, 1, 2, 3) assert twin_outside_weights.shape[3] == height assert twin_outside_weights.shape[4] == width top[3].reshape(*twin_outside_weights.shape) top[3].data[...] = twin_outside_weights