def forward(self, input): """ Forward pass of the function. """ if self.hard is False: return (input >= 0).float() * swish_function( input, False, False, None, None ) + (input < 0).float() * (F.exp(input) - 1) * F.sigmoid(input) else: return (input >= 0).float() * input * F.max( self.a, F.min(self.b, (input + 1.0) / 2.0) ) + (input < 0).float() * ( F.exp(input - 1) * F.max(self.a, F.min(self.b, (input + 1.0) / 2.0)) )
def forward(self, a): if self.mode == "sum": return F.sum(a, axis=2) elif self.mode == "mean": return F.mean(a, axis=2) else: return F.max(a, axis=2)
def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45): box_corner = F.zeros_like(prediction) box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 prediction[:, :, :4] = box_corner[:, :, :4] output = [None for _ in range(len(prediction))] for i, image_pred in enumerate(prediction): # If none are remaining => process next image if not image_pred.shape[0]: continue # Get score and class with highest confidence class_conf = F.max(image_pred[:, 5 : 5 + num_classes], 1, keepdims=True) class_pred = F.argmax(image_pred[:, 5 : 5 + num_classes], 1, keepdims=True) class_conf_squeeze = F.squeeze(class_conf) conf_mask = image_pred[:, 4] * class_conf_squeeze >= conf_thre detections = F.concat((image_pred[:, :5], class_conf, class_pred), 1) detections = detections[conf_mask] if not detections.shape[0]: continue nms_out_index = F.vision.nms( detections[:, :4], detections[:, 4] * detections[:, 5], nms_thre, ) detections = detections[nms_out_index] if output[i] is None: output[i] = detections else: output[i] = F.concat((output[i], detections)) return output
def fpn_anchor_target_opr_core_impl(gt_boxes, im_info, anchors, allow_low_quality_matches=True): ignore_label = config.ignore_label # get the gt boxes valid_gt_boxes = gt_boxes[:im_info[5], :] non_ignore_mask = valid_gt_boxes[:, -1] > 0 non_ignore_inds = mask_to_inds(non_ignore_mask) valid_gt_boxes = valid_gt_boxes.ai[non_ignore_inds] # compute the iou matrix overlaps = box_overlap_opr(anchors, valid_gt_boxes[:, :4]) # match the dtboxes a_shp0 = anchors.shape[0] max_overlaps = F.max(overlaps, axis=1) argmax_overlaps = F.argmax(overlaps, axis=1) # all ignore labels = mge.ones(a_shp0).astype(np.int32) * ignore_label # set negative ones labels = labels * (max_overlaps >= config.rpn_negative_overlap) # set positive ones fg_mask = (max_overlaps >= config.rpn_positive_overlap) const_one = mge.tensor(1.0) if allow_low_quality_matches: # match the max gt gt_max_overlaps = F.max(overlaps, axis=0) gt_argmax_overlaps = F.argmax(overlaps, axis=0) g_shp0 = valid_gt_boxes.shapeof()[0] gt_id = F.linspace(0, g_shp0 - 1, g_shp0).astype(np.int32) argmax_overlaps = argmax_overlaps.set_ai(gt_id)[gt_argmax_overlaps] max_overlaps = max_overlaps.set_ai( const_one.broadcast(g_shp0))[gt_argmax_overlaps] fg_mask = (max_overlaps >= config.rpn_positive_overlap) # set positive ones fg_mask_ind = mask_to_inds(fg_mask) labels = labels.set_ai(const_one.broadcast( fg_mask_ind.shapeof()))[fg_mask_ind] # compute the targets bbox_targets = bbox_transform_opr(anchors, valid_gt_boxes.ai[argmax_overlaps, :4]) if config.rpn_bbox_normalize_targets: std_opr = mge.tensor(config.bbox_normalize_stds[None, :]) mean_opr = mge.tensor(config.bbox_normalize_means[None, :]) minus_opr = mean_opr / std_opr bbox_targets = bbox_targets / std_opr - minus_opr return labels, bbox_targets
def fpn_anchor_target_opr_core_impl(gt_boxes, im_info, anchors, allow_low_quality_matches=True): ignore_label = config.ignore_label # get the gt boxes gtboxes = gt_boxes[:im_info[5].astype(np.int32)] ignore_mask = F.equal(gtboxes[:, 4], config.ignore_label) # find the valid gtboxes _, index = F.cond_take(1 - ignore_mask > 0, ignore_mask) valid_gt_boxes = gtboxes[index.astype(np.int32)] # compute the iou matrix overlaps = box_overlap_opr(anchors, valid_gt_boxes[:, :4]) # match the dtboxes a_shp0 = anchors.shape[0] argmax_overlaps = F.argmax(overlaps, axis=1) max_overlaps = F.nn.indexing_one_hot(overlaps, argmax_overlaps.astype(np.int32), 1) labels = F.ones(a_shp0).astype(np.int32) * ignore_label # set negative ones labels = labels * (max_overlaps >= config.rpn_negative_overlap).astype( np.float32) # set positive ones fg_mask = (max_overlaps >= config.rpn_positive_overlap) const_one = mge.tensor(1.0) if allow_low_quality_matches: # match the max gt gt_max_overlaps = F.max(overlaps, axis=0) gt_argmax_overlaps = F.argmax(overlaps, axis=0) gt_argmax_overlaps = gt_argmax_overlaps.astype(np.int32) max_overlaps[gt_argmax_overlaps] = 1. m = gt_max_overlaps.shape[0] argmax_overlaps[gt_argmax_overlaps] = F.linspace(0, m - 1, m).astype(np.int32) fg_mask = (max_overlaps >= config.rpn_positive_overlap) labels[fg_mask] = 1 # compute the bbox targets bbox_targets = bbox_transform_opr(anchors, valid_gt_boxes[argmax_overlaps, :4]) if config.rpn_bbox_normalize_targets: std_opr = mge.tensor(config.bbox_normalize_stds[None, :]).to( anchors.device) mean_opr = mge.tensor(config.bbox_normalize_means[None, :]).to( anchors.device) minus_opr = mean_opr / std_opr bbox_targets = bbox_targets / std_opr - minus_opr return labels, bbox_targets
def per_level_gt(self, gt_boxes, im_info, anchors, allow_low_quality_matches=True): ignore_label = self.cfg.ignore_label # get the gt boxes valid_gt_boxes = gt_boxes[:im_info[4], :] # compute the iou matrix overlaps = layers.get_iou(anchors, valid_gt_boxes[:, :4]) # match the dtboxes a_shp0 = anchors.shape[0] max_overlaps = F.max(overlaps, axis=1) argmax_overlaps = F.argmax(overlaps, axis=1) # all ignore labels = mge.ones(a_shp0).astype("int32") * ignore_label # set negative ones labels = labels * (max_overlaps >= self.cfg.rpn_negative_overlap) # set positive ones fg_mask = max_overlaps >= self.cfg.rpn_positive_overlap const_one = mge.tensor(1.0) if allow_low_quality_matches: # make sure that max iou of gt matched gt_argmax_overlaps = F.argmax(overlaps, axis=0) num_valid_boxes = valid_gt_boxes.shapeof(0) gt_id = F.linspace(0, num_valid_boxes - 1, num_valid_boxes).astype("int32") argmax_overlaps = argmax_overlaps.set_ai(gt_id)[gt_argmax_overlaps] max_overlaps = max_overlaps.set_ai( const_one.broadcast(num_valid_boxes))[gt_argmax_overlaps] fg_mask = max_overlaps >= self.cfg.rpn_positive_overlap # set positive ones _, fg_mask_ind = F.cond_take(fg_mask == 1, fg_mask) labels = labels.set_ai(const_one.broadcast( fg_mask_ind.shapeof(0)))[fg_mask_ind] # compute the targets bbox_targets = self.box_coder.encode( anchors, valid_gt_boxes.ai[argmax_overlaps, :4]) return labels, bbox_targets
def __call__(self, matrix): """ matrix(tensor): A two dim tensor with shape of (N, M). N is number of GT-boxes, while M is the number of anchors in detection. """ assert len(matrix.shape) == 2 max_scores = matrix.max(axis=0) match_indices = F.argmax(matrix, axis=0) # default ignore label: -1 labels = F.full_like(match_indices, -1) for label, low, high in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]): mask = (max_scores >= low) & (max_scores < high) labels[mask] = label if self.allow_low_quality_matches: mask = (matrix == F.max(matrix, axis=1, keepdims=True)).sum(axis=0) > 0 labels[mask] = 1 return match_indices, labels
def get_ground_truth(self, anchors_list, batched_gt_boxes, batched_num_gts): labels_list = [] offsets_list = [] ctrness_list = [] all_level_anchors = F.concat(anchors_list, axis=0) for bid in range(batched_gt_boxes.shape[0]): gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]] offsets = self.point_coder.encode( all_level_anchors, F.expand_dims(gt_boxes[:, :4], axis=1)) object_sizes_of_interest = F.concat([ F.broadcast_to( F.expand_dims(mge.tensor(size, dtype=np.float32), axis=0), (anchors_i.shape[0], 2)) for anchors_i, size in zip( anchors_list, self.cfg.object_sizes_of_interest) ], axis=0) max_offsets = F.max(offsets, axis=2) is_cared_in_the_level = ( (max_offsets >= F.expand_dims(object_sizes_of_interest[:, 0], axis=0)) & (max_offsets <= F.expand_dims(object_sizes_of_interest[:, 1], axis=0))) if self.cfg.center_sampling_radius > 0: gt_centers = (gt_boxes[:, :2] + gt_boxes[:, 2:4]) / 2 is_in_boxes = [] for stride, anchors_i in zip(self.cfg.stride, anchors_list): radius = stride * self.cfg.center_sampling_radius center_boxes = F.concat([ F.maximum(gt_centers - radius, gt_boxes[:, :2]), F.minimum(gt_centers + radius, gt_boxes[:, 2:4]), ], axis=1) center_offsets = self.point_coder.encode( anchors_i, F.expand_dims(center_boxes, axis=1)) is_in_boxes.append(F.min(center_offsets, axis=2) > 0) is_in_boxes = F.concat(is_in_boxes, axis=1) else: is_in_boxes = F.min(offsets, axis=2) > 0 gt_area = (gt_boxes[:, 2] - gt_boxes[:, 0]) * (gt_boxes[:, 3] - gt_boxes[:, 1]) # FIXME: use repeat instead of broadcast_to areas = F.broadcast_to(F.expand_dims(gt_area, axis=1), offsets.shape[:2]) areas[~is_cared_in_the_level] = float("inf") areas[~is_in_boxes] = float("inf") match_indices = F.argmin(areas, axis=0) gt_boxes_matched = gt_boxes[match_indices] anchor_min_area = F.indexing_one_hot(areas, match_indices, axis=0) labels = gt_boxes_matched[:, 4].astype(np.int32) labels[anchor_min_area == float("inf")] = 0 offsets = self.point_coder.encode(all_level_anchors, gt_boxes_matched[:, :4]) left_right = offsets[:, [0, 2]] top_bottom = offsets[:, [1, 3]] ctrness = F.sqrt( F.maximum( F.min(left_right, axis=1) / F.max(left_right, axis=1), 0) * F.maximum( F.min(top_bottom, axis=1) / F.max(top_bottom, axis=1), 0)) labels_list.append(labels) offsets_list.append(offsets) ctrness_list.append(ctrness) return ( F.stack(labels_list, axis=0).detach(), F.stack(offsets_list, axis=0).detach(), F.stack(ctrness_list, axis=0).detach(), )
def get_ground_truth(self, anchors_list, batched_gt_boxes, batched_num_gts): labels_list = [] offsets_list = [] ctrness_list = [] all_level_anchors = F.concat(anchors_list, axis=0) for bid in range(batched_gt_boxes.shape[0]): gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]] ious = [] candidate_idxs = [] base = 0 for stride, anchors_i in zip(self.cfg.stride, anchors_list): ious.append( layers.get_iou( gt_boxes[:, :4], F.concat([ anchors_i - stride * self.cfg.anchor_scale / 2, anchors_i + stride * self.cfg.anchor_scale / 2, ], axis=1))) gt_centers = (gt_boxes[:, :2] + gt_boxes[:, 2:4]) / 2 distances = F.sqrt( F.sum((F.expand_dims(gt_centers, axis=1) - anchors_i)**2, axis=2)) _, topk_idxs = F.topk(distances, self.cfg.anchor_topk) candidate_idxs.append(base + topk_idxs) base += anchors_i.shape[0] ious = F.concat(ious, axis=1) candidate_idxs = F.concat(candidate_idxs, axis=1) candidate_ious = F.gather(ious, 1, candidate_idxs) ious_thr = (F.mean(candidate_ious, axis=1, keepdims=True) + F.std(candidate_ious, axis=1, keepdims=True)) is_foreground = F.scatter( F.zeros(ious.shape), 1, candidate_idxs, F.ones(candidate_idxs.shape)).astype(bool) & (ious >= ious_thr) is_in_boxes = F.min(self.point_coder.encode( all_level_anchors, F.expand_dims(gt_boxes[:, :4], axis=1)), axis=2) > 0 ious[~is_foreground] = -1 ious[~is_in_boxes] = -1 match_indices = F.argmax(ious, axis=0) gt_boxes_matched = gt_boxes[match_indices] anchor_max_iou = F.indexing_one_hot(ious, match_indices, axis=0) labels = gt_boxes_matched[:, 4].astype(np.int32) labels[anchor_max_iou == -1] = 0 offsets = self.point_coder.encode(all_level_anchors, gt_boxes_matched[:, :4]) left_right = offsets[:, [0, 2]] top_bottom = offsets[:, [1, 3]] ctrness = F.sqrt( F.clip(F.min(left_right, axis=1) / F.max(left_right, axis=1), lower=0) * F.clip(F.min(top_bottom, axis=1) / F.max(top_bottom, axis=1), lower=0)) labels_list.append(labels) offsets_list.append(offsets) ctrness_list.append(ctrness) return ( F.stack(labels_list, axis=0).detach(), F.stack(offsets_list, axis=0).detach(), F.stack(ctrness_list, axis=0).detach(), )
[(64, 512, 16, 16)], True, 1000, ), ( "prelu", MF.prelu, TF.prelu, [(100, 100), (1, )], [(64, 512, 16, 16), (1, )], True, 1000, ), ( "reduce.max", lambda x: MF.max(x, 0), lambda x: torch.max(x, 0), [(100, 100)], [(64, 512, 16, 16)], True, 1000, ), ( "reduce.mean", lambda x: MF.mean(x, 0), lambda x: torch.mean(x, 0), [(100, 100)], [(64, 512, 16, 16)], True, 1000, ),
def forward(self, features, label=None, mask=None): """ if label and mask both None, the loss will degenerate to SimSLR unsupervised loss. Reference: "A Simple Framework for Contrastive Learning of Visual Representations"<https://arxiv.org/pdf/2002.05709.pdf> "Supervised Contrastive Learning"<https://arxiv.org/abs/2004.11362> Args: features(tensor): The embedding feature. shape=[bs, n_views, ...] label(tensor): The label of images, shape=[bs] mask(tensor): contrastive mask of shape [bsz, bsz], mask_{i,j}=1 if sample j has the same class as sample i. Can be asymmetric. return: loss """ if len(features.shape) < 3: raise ValueError("Features need have 3 dimensions at least") bs, num_view = features.shape[:2] #if dimension > 3, change the shape of the features to [bs, num_view, ...] if len(features.shape) > 3: features = features.reshape(bs, num_view, -1) #label and mask cannot provided at the same time if (label is not None) and (mask is not None): raise ValueError("label and mask cannot provided at the same time") elif (label is None) and (mask is None): mask = F.eye(bs, dtype="float32") elif label is not None: label = label.reshape(-1, 1) if label.shape[0] != bs: raise RuntimeError( "Num of labels does not match num of features") mask = F.equal(label, label.T) else: mask = mask.astype("float32") contrast_count = features.shape[1] features = F.split(features, features.shape[1], axis=1) contrast_feature = F.squeeze(F.concat(features, axis=0), axis=1) if self.contrast_mode == "one": anchor_feature = features[:, 0] anchor_count = 1 elif self.contrast_mode == "all": anchor_feature = contrast_feature anchor_count = contrast_count else: raise ValueError("Unknown mode:{}".format(self.contrast_mode)) #compute logits anchor_dot_contrast = F.div( F.matmul(anchor_feature, contrast_feature.T), self.temperate) #for numerical stability logits_max = F.max(anchor_dot_contrast, axis=-1, keepdims=True) logits = anchor_dot_contrast - logits_max #tile mask an1, con = mask.shape[:2] nums = anchor_count * contrast_count # mask-out self-contrast cases mask = F.stack([mask] * nums).reshape(an1 * anchor_count, con * contrast_count) logits_mask = F.scatter( F.ones_like(mask), 1, F.arange(0, int(bs * anchor_count), dtype="int32").reshape(-1, 1), F.zeros(int(bs * anchor_count), dtype="int32").reshape(-1, 1)) mask = mask * logits_mask #compute log_prob exp_logits = F.exp(logits) * logits_mask log_prob = logits - F.log(F.sum(exp_logits, axis=1, keepdims=True)) #equation 2 #mean mean_log_prob_pos = F.sum(mask * log_prob, axis=1) / F.sum(mask, axis=1) #loss loss = -(self.temperate / self.base_temperate) * mean_log_prob_pos loss = F.mean(loss.reshape(anchor_count, bs)) return loss