def _processBinayMasks(self, ann): boxes = [] masks = [] labels = [] def mask_to_tight_box(mask): a = mask.nonzero() bbox = [ jt.min(a[:, 1]), jt.min(a[:, 0]), jt.max(a[:, 1]), jt.max(a[:, 0]), ] bbox = list(map(int, bbox)) return bbox # xmin, ymin, xmax, ymax # Sort for consistent order between instances as the polygon annotation _, instIds = jt.argsort(jt.unique(ann)) for instId in instIds.numpy(): if instId < 1000: # group labels continue mask = ann == instId label = int(instId / 1000) label = self.cityscapesID_to_ind[label] box = mask_to_tight_box(mask) boxes.append(box) masks.append(mask) labels.append(label) return boxes, masks, labels
def check_cub_argsort(shape, dim, descending = False): with jt.log_capture_scope( log_silent=1, log_v=0, log_vprefix="op.cc=100" ) as raw_log: x = jt.random(shape) y, y_key = jt.argsort(x, dim=dim, descending=descending) v = [] for i in range(len(shape)): if (i == dim): v.append(y) else: v.append(jt.index(shape, dim=i)) yk = jt.reindex(x, v) yk_ = yk.data y_key_ = y_key.data logs = find_log_with_re(raw_log, "(Jit op key (not )?found: " + "cub_argsort" + ".*)") assert len(logs)==1 x__ = x.data if descending: x__ = -x__ yk__ = np.sort(x__, axis=dim) if descending: yk__ = -yk__ assert np.allclose(y_key_, yk__) assert np.allclose(yk_, yk__)
def select_top_predictions(self, predictions): """ Select only predictions which have a `score` > self.confidence_threshold, and returns the predictions in descending order of score Arguments: predictions (BoxList): the result of the computation by the model. It should contain the field `scores`. Returns: prediction (BoxList): the detected objects. Additional information of the detection properties can be found in the fields of the BoxList via `prediction.fields()` """ if predictions.has_field("mask_scores"): scores = predictions.get_field("mask_scores") else: scores = predictions.get_field("scores") if scores.shape[0]==0: return None keep = jt.nonzero(scores>self.confidence_threshold).squeeze(1) predictions = predictions[keep] scores = predictions.get_field("scores") idx,_ = jt.argsort(scores,0, descending=True) return predictions[idx]
def fast_nms(self, boxes, masks, scores, iou_threshold: float = 0.5, top_k: int = 200, second_threshold: bool = False): idx, scores = scores.argsort(1, descending=True) idx = idx[:, :top_k] scores = scores[:, :top_k] num_classes, num_dets = idx.shape boxes = boxes[idx.view(-1)].view(num_classes, num_dets, 4) masks = masks[idx.view(-1)].view(num_classes, num_dets, -1) iou = jaccard(boxes, boxes) iou = iou.triu_(diagonal=1) iou_max = iou.max(dim=1) # Now just filter out the ones higher than the threshold keep = (iou_max <= iou_threshold) # We should also only keep detections over the confidence threshold, but at the cost of # maxing out your detection count for every image, you can just not do that. Because we # have such a minimal amount of computation per detection (matrix mulitplication only), # this increase doesn't affect us much (+0.2 mAP for 34 -> 33 fps), so we leave it out. # However, when you implement this in your method, you should do this second threshold. if second_threshold: keep *= (scores > self.conf_thresh) # Assign each kept detection to its corresponding class classes = jt.arange(num_classes).unsqueeze(1).expand_as(keep) classes = classes[keep] boxes = boxes[keep] masks = masks[keep] scores = scores[keep] #print('keep finish') # Only keep the top cfg.max_num_detections highest scores across all classes idx, scores = jt.argsort(scores, dim=0, descending=True) #print('argsort finish') idx = idx[:cfg.max_num_detections] scores = scores[:cfg.max_num_detections] classes = classes[idx] boxes = boxes[idx] masks = masks[idx] ''' scores = scores[:cfg.max_num_detections] classes = classes[:cfg.max_num_detections] boxes = boxes[:cfg.max_num_detections] masks = masks[:cfg.max_num_detections] ''' return boxes, masks, classes, scores
def topk(input, k, dim=None, largest=True, sorted=True): if input.numel()==0: return jt.array([],dtype=input.dtype),jt.array([],dtype='int32') if dim is None: dim = -1 if dim<0: dim+=input.ndim index,values = jt.argsort(input,dim=dim,descending=largest) dims = (slice(None),)*dim+(slice(0,k),) indices = index[dims] values = values[dims] return values,indices
def kthvalue(input, k, dim=None, keepdim=False): if dim is None: dim = -1 if dim<0: dim+=input.ndim index,values = jt.argsort(input,dim=dim) dims = (slice(None),)*dim+(slice(k-1,k),) indices = index[dims] values = values[dims] if not keepdim and indices.ndim>1: indices = indices.squeeze(dim) values = values.squeeze(dim) return values,indices
def unique(x): r''' Returns the unique elements of the input tensor. Args: x– the input tensor. ''' x = x.reshape(-1) _, x = jt.argsort(x) index, = jt.index((x.shape[0], )) y = x[1:][x[index[1:]] != x[index[:-1]]] x = jt.contrib.concat([x[:1], y], dim=0) return x
def topk(input, k, dim=None, largest=True, sorted=True): if dim is None: dim = -1 if dim < 0: dim += input.ndim transpose_dims = [i for i in range(input.ndim)] transpose_dims[0] = dim transpose_dims[dim] = 0 input = input.transpose(transpose_dims) index, values = jt.argsort(input, dim=0, descending=largest) indices = index[:k] values = values[:k] indices = indices.transpose(transpose_dims) values = values.transpose(transpose_dims) return [values, indices]
def unique(x): r''' Returns the unique elements of the input tensor. Args: x– the input tensor. ''' x = x.reshape(-1) _,x = jt.argsort(x) index2 = [i for i in range(1,x.shape[0])] index1 = [i for i in range(x.shape[0]-1)] y = x[1:][x[index2] != x[index1]] x = jt.contrib.concat([x[:1],y],dim=0) return x
def nms(dets,thresh): ''' dets jt.array [x1,y1,x2,y2,score] x(:,0)->x1,x(:,1)->y1,x(:,2)->x2,x(:,3)->y2,x(:,4)->score ''' threshold = str(thresh) order = jt.argsort(dets[:,4],descending=True)[0] dets = dets[order] s_1 = '(@x(j,2)-@x(j,0)+1)*(@x(j,3)-@x(j,1)+1)' s_2 = '(@x(i,2)-@x(i,0)+1)*(@x(i,3)-@x(i,1)+1)' s_inter_w = 'max((Tx)0,min(@x(j,2),@x(i,2))-max(@x(j,0),@x(i,0))+1)' s_inter_h = 'max((Tx)0,min(@x(j,3),@x(i,3))-max(@x(j,1),@x(i,1))+1)' s_inter = s_inter_h+'*'+s_inter_w iou = s_inter + '/(' + s_1 +'+' + s_2 + '-' + s_inter + ')' fail_cond = iou+'>'+threshold selected = jt.candidate(dets, fail_cond) return order[selected]
def lovasz_hinge_flat(self, logits, labels): """ Binary Lovasz hinge loss logits: [P] Variable, logits at each prediction (between -\infty and +\infty) labels: [P] Tensor, binary ground truth labels (0 or 1) ignore: label to ignore """ if len(labels) == 0: # only void pixels, the gradients should be 0 return logits.sum() * 0. signs = 2. * labels.float() - 1. errors = (1. - logits * jt.array(signs)) perm, errors_sorted = jt.argsort(errors, dim=0, descending=True) perm = perm.data gt_sorted = labels[perm] grad = lovasz_grad(gt_sorted) loss = jt.dot(nn.relu(errors_sorted), jt.array(grad)) return loss
def execute(self, xyz1, xyz2, points1, points2): """ Input: xyz1: input points position data, [B, C, N] xyz2: sampled input points position data, [B, C, S] points1: input points data, [B, D, N] points2: input points data, [B, D, S] Return: new_points: upsampled points data, [B, D', N] """ # xyz1 = xyz1.permute(0, 2, 1) # xyz2 = xyz2.permute(0, 2, 1) # points2 = points2.permute(0, 2, 1) B, N, C = xyz1.shape _, S, _ = xyz2.shape if S == 1: interpolated_points = points2.repeat(1, N, 1) else: dists = square_distance(xyz1, xyz2) idx, dists = jt.argsort(dists, dim=-1) dists, idx = dists[:, :, :3], idx[:, :, :3] # [B, N, 3] dist_recip = 1.0 / (dists + 1e-8) norm = jt.sum(dist_recip, dim=2, keepdims=True) weight = dist_recip / norm interpolated_points = jt.sum(index_points(points2, idx) * weight.view(B, N, 3, 1), dim=2) if points1 is not None: # points1 = points1.permute(0, 2, 1) new_points = concat([points1, interpolated_points], dim=-1) else: new_points = interpolated_points new_points = new_points.permute(0, 2, 1) # l = len(self.mlp_convs) for i, conv in self.mlp_convs.layers.items(): # conv = self.mlp_convs[i] bn = self.mlp_bns[i] new_points = self.relu(bn(conv(new_points))) return new_points.permute(0, 2, 1)
def check_argsort(shape, dim, descending = False): x = jt.random(shape) y, y_key = jt.argsort(x, dim=dim, descending=descending) v = [] for i in range(len(shape)): if (i == dim): v.append(y) else: v.append(jt.index(shape, dim=i)) yk = jt.reindex(x, v) yk_ = yk.data y_key_ = y_key.data x__ = x.data if descending: x__ = -x__ yk__ = np.sort(x__, axis=dim) if descending: yk__ = -yk__ assert np.allclose(y_key_, yk__) assert np.allclose(yk_, yk__)
def randperm(n, dtype="int32"): key = jt.random((n, )) index, _ = jt.argsort(key) return index.cast(dtype)
def run_model(config_file, img_f=None): original_image = load(img_f) from detectron.config import cfg from detectron.modeling.detector import build_detection_model from detectron.utils.checkpoint import DetectronCheckpointer from detectron.structures.image_list import to_image_list from detectron.modeling.roi_heads.mask_head.inference import Masker from jittor import transform as T from jittor import nn import jittor as jt from jittor_utils import auto_diff jt.flags.use_cuda = 1 confidence_threshold = 0.0 cfg.merge_from_file(config_file) model = build_detection_model(cfg) checkpointer = DetectronCheckpointer(cfg, model, save_dir=cfg.OUTPUT_DIR) _ = checkpointer.load(cfg.MODEL.WEIGHT) name = config_file.split('/')[-1].split('.')[0] # hook = auto_diff.Hook(name) # hook.hook_module(model) model.eval() class Resize(object): def __init__(self, min_size, max_size): self.min_size = min_size self.max_size = max_size # modified from torchvision to add support for max size def get_size(self, image_size): w, h = image_size size = self.min_size max_size = self.max_size if max_size is not None: min_original_size = float(min((w, h))) max_original_size = float(max((w, h))) if max_original_size / min_original_size * size > max_size: size = int( round(max_size * min_original_size / max_original_size)) if (w <= h and w == size) or (h <= w and h == size): return (h, w) if w < h: ow = size oh = int(size * h / w) else: oh = size ow = int(size * w / h) return (oh, ow) def __call__(self, image): size = self.get_size(image.size) image = T.resize(image, size) return image def build_transform(): if cfg.INPUT.TO_BGR255: to_bgr_transform = T.Lambda(lambda x: x * 255) else: to_bgr_transform = T.Lambda(lambda x: x[[2, 1, 0]]) normalize_transform = T.ImageNormalize(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD) min_size = cfg.INPUT.MIN_SIZE_TEST max_size = cfg.INPUT.MAX_SIZE_TEST transform = T.Compose([ T.ToPILImage(), Resize(min_size, max_size), T.ToTensor(), to_bgr_transform, normalize_transform, ]) return transform transforms = build_transform() image = transforms(original_image) image_list = to_image_list(image, cfg.DATALOADER.SIZE_DIVISIBILITY) predictions = model(image_list) predictions = predictions[0] if predictions.has_field("mask_scores"): scores = predictions.get_field("mask_scores") else: scores = predictions.get_field("scores") keep = jt.nonzero(scores > confidence_threshold).squeeze(1) predictions = predictions[keep] scores = predictions.get_field("scores") idx, _ = jt.argsort(scores, 0, descending=True) predictions = predictions[idx] result_diff(predictions)
def check_backward(shape, dim, descending = False): x = jt.random(shape) y, y_key = jt.argsort(x, dim=dim, descending=descending) loss = (y_key * y_key).sum() gs = jt.grad(loss, x) assert np.allclose(x.data*2, gs.data)
def execute(self, loc, score, anchor, img_size, scale=1.): """input should be ndarray Propose RoIs. Inputs :obj:`loc, score, anchor` refer to the same anchor when indexed by the same index. On notations, :math:`R` is the total number of anchors. This is equal to product of the height and the width of an image and the number of anchor bases per pixel. Type of the output is same as the inputs. Args: loc (array): Predicted offsets and scaling to anchors. Its shape is :math:`(R, 4)`. score (array): Predicted foreground probability for anchors. Its shape is :math:`(R,)`. anchor (array): Coordinates of anchors. Its shape is :math:`(R, 4)`. img_size (tuple of ints): A tuple :obj:`height, width`, which contains image size after scaling. scale (float): The scaling factor used to scale an image after reading it from a file. Returns: array: An array of coordinates of proposal boxes. Its shape is :math:`(S, 4)`. :math:`S` is less than :obj:`self.n_test_post_nms` in test time and less than :obj:`self.n_train_post_nms` in train time. :math:`S` depends on the size of the predicted bounding boxes and the number of bounding boxes discarded by NMS. """ # NOTE: when test, remember if self.is_training(): n_pre_nms = self.n_train_pre_nms n_post_nms = self.n_train_post_nms else: n_pre_nms = self.n_test_pre_nms n_post_nms = self.n_test_post_nms # Convert anchors into proposal via bbox transformations. roi = loc2bbox(anchor, loc) # Clip predicted boxes to image. roi[:, 0] = jt.clamp(roi[:, 0], min_v=0, max_v=img_size[0]) roi[:, 2] = jt.clamp(roi[:, 2], min_v=0, max_v=img_size[0]) roi[:, 1] = jt.clamp(roi[:, 1], min_v=0, max_v=img_size[1]) roi[:, 3] = jt.clamp(roi[:, 3], min_v=0, max_v=img_size[1]) # Remove predicted boxes with either height or width < threshold. min_size = self.min_size * scale hs = roi[:, 2] - roi[:, 0] ws = roi[:, 3] - roi[:, 1] keep = jt.where((hs >= min_size) & (ws >= min_size))[0] roi = roi[keep, :] score = score[keep] # Sort all (proposal, score) pairs by score from highest to lowest. # Take top pre_nms_topN (e.g. 6000). order, _ = jt.argsort(score, descending=True) if n_pre_nms > 0: order = order[:n_pre_nms] roi = roi[order, :] score = score[order] # Apply nms (e.g. threshold = 0.7). # Take after_nms_topN (e.g. 300). dets = jt.contrib.concat([roi, score.unsqueeze(1)], dim=1) keep = jt.nms(dets, self.nms_thresh) if n_post_nms > 0: keep = keep[:n_post_nms] roi = roi[keep] return roi
def execute(self, xyz1, xyz2, points1, points2): """ Input: xyz1: input points position data, [B, C, N] xyz2: sampled input points position data, [B, C, S] points1: input points data, [B, D, N] points2: input points data, [B, D, S] Return: new_points: upsampled points data, [B, D', N] """ # print ('xyz1.shape, xyz2.shape') # print (xyz1.shape, xyz2.shape, points1.shape, points2.shape) xyz1 = xyz1.permute(0, 2, 1) xyz2 = xyz2.permute(0, 2, 1) points1 = points1.permute(0, 2, 1) points2 = points2.permute(0, 2, 1) B, N, C = xyz1.shape _, S, _ = xyz2.shape # points2 = points2.permute(0, 2, 1) # print (xyz1.shape, xyz2.shape) dists = square_distance(xyz1, xyz2) idx, dists = jt.argsort(dists, dim=-1) dists, idx = dists[:, :, :3], idx[:, :, :3] # [B, N, 3] dist_recip = 1.0 / (dists + 1e-8) norm = jt.sum(dist_recip, dim=2, keepdims=True) weight = dist_recip / norm interpolated_points = jt.sum(index_points(points2, idx) * weight.view(B, N, 3, 1), dim=2) # print ('interpolated_points shape', interpolated_points.shape) xyz_density = compute_density(xyz1, self.bandwidth) density_scale = self.densitynet(xyz_density) new_xyz, new_points, grouped_xyz_norm, _, grouped_density = sample_and_group( N, self.nsample, xyz1, interpolated_points, density_scale.reshape(B, N, 1)) new_points = new_points.permute(0, 3, 2, 1) # [B, C+D, nsample,npoint] for i in range(len(self.mlp_convs)): conv = self.mlp_convs[i] bn = self.mlp_bns[i] # print ('new new new point shape', new_points.shape) new_points = self.relu(bn(conv(new_points))) grouped_xyz = grouped_xyz_norm.permute(0, 3, 2, 1) weights = self.weightnet(grouped_xyz) new_points = new_points * grouped_density.permute(0, 3, 2, 1) new_points = jt.matmul(new_points.permute(0, 3, 1, 2), weights.permute(0, 3, 2, 1)).reshape(B, N, -1) new_points = self.linear(new_points) new_points = self.bn_linear(new_points.permute(0, 2, 1)) new_points = self.relu(new_points) new_xyz = new_xyz.permute(0, 2, 1) return new_points
def render_rays(ray_batch, network_fn, network_query_fn, N_samples, retraw=False, lindisp=False, perturb=0., N_importance=0, network_fine=None, white_bkgd=False, raw_noise_std=0., verbose=False): """Volumetric rendering. Args: ray_batch: array of shape [batch_size, ...]. All information necessary for sampling along a ray, including: ray origin, ray direction, min dist, max dist, and unit-magnitude viewing direction. network_fn: function. Model for predicting RGB and density at each point in space. network_query_fn: function used for passing queries to network_fn. N_samples: int. Number of different times to sample along each ray. retraw: bool. If True, include model's raw, unprocessed predictions. lindisp: bool. If True, sample linearly in inverse depth rather than in depth. perturb: float, 0 or 1. If non-zero, each ray is sampled at stratified random points in time. N_importance: int. Number of additional times to sample along each ray. These samples are only passed to network_fine. network_fine: "fine" network with same spec as network_fn. white_bkgd: bool. If True, assume a white background. raw_noise_std: ... verbose: bool. If True, print more debugging info. Returns: rgb_map: [num_rays, 3]. Estimated RGB color of a ray. Comes from fine model. disp_map: [num_rays]. Disparity map. 1 / depth. acc_map: [num_rays]. Accumulated opacity along each ray. Comes from fine model. raw: [num_rays, num_samples, 4]. Raw predictions from model. rgb0: See rgb_map. Output for coarse model. disp0: See disp_map. Output for coarse model. acc0: See acc_map. Output for coarse model. z_std: [num_rays]. Standard deviation of distances along ray for each sample. """ N_rays = ray_batch.shape[0] rays_o, rays_d = ray_batch[:, 0:3], ray_batch[:, 3:6] # [N_rays, 3] each viewdirs = ray_batch[:, -3:] if ray_batch.shape[-1] > 8 else None bounds = jt.reshape(ray_batch[..., 6:8], [-1, 1, 2]) near, far = bounds[..., 0], bounds[..., 1] # [-1,1] z_vals = sample(N_rays, N_samples, lindisp, perturb, near, far) pts = rays_o.unsqueeze(-2) + rays_d.unsqueeze(-2) * z_vals.unsqueeze( -1) # [N_rays, N_samples, 3] raw = network_query_fn(pts, viewdirs, network_fn) rgb_map, disp_map, acc_map, weights, depth_map = integrator( raw, z_vals, rays_d, raw_noise_std, white_bkgd) rgb_map_0, disp_map_0, acc_map_0 = rgb_map, disp_map, acc_map #usefulRayIndex = jt.nonzero(acc_map > 0.1) if N_importance > 0: # importance sampling z_vals_mid = .5 * (z_vals[..., 1:] + z_vals[..., :-1]) z_samples = sample_pdf(z_vals_mid, weights[..., 1:-1], N_importance, det=(perturb == 0.)) z_samples = z_samples.detach() _, z_vals = jt.argsort(jt.concat([z_vals, z_samples], -1), -1) pts = rays_o.unsqueeze(-2) + rays_d.unsqueeze(-2) * z_vals.unsqueeze( -1) # [N_rays, N_samples + N_importance, 3] run_fn = network_fn if network_fine is None else network_fine raw = network_query_fn(pts, viewdirs, run_fn) rgb_map, disp_map, acc_map, weights, depth_map = integrator( raw, z_vals, rays_d, raw_noise_std, white_bkgd) ret = {'rgb_map': rgb_map, 'disp_map': disp_map, 'acc_map': acc_map} if retraw: ret['raw'] = raw if N_importance > 0: ret['rgb0'] = rgb_map_0 ret['disp0'] = disp_map_0 ret['acc0'] = acc_map_0 return ret