def fuse_conv_and_bn(conv, bn): # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ fusedconv = nn.Conv2d(conv.in_channels, conv.out_channels, kernel_size=conv.kernel_size, stride=conv.stride, padding=conv.padding, groups=conv.groups, bias=True) # prepare filters w_conv = conv.weight.clone().view(conv.out_channels, -1) w_bn = jt.diag(bn.weight / (jt.sqrt(bn.eps + bn.running_var))) fusedconv.weight.assign( jt.matmul(w_bn, w_conv).view(fusedconv.weight.shape)) # prepare spatial bias b_conv = jt.zeros( (conv.weight.shape[0], )) if conv.bias is None else conv.bias b_bn = bn.bias - bn.weight * bn.running_mean / jt.sqrt(bn.running_var + bn.eps) fusedconv.bias.assign( jt.matmul(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) return fusedconv
def test_var_holder(self): jt.clean() expect_error(lambda: jt.matmul(1,1)) expect_error(lambda: jt.matmul([1],[1])) expect_error(lambda: jt.matmul([[1]],[1])) self.assertEqual(jt.number_of_lived_vars(), 0) a = jt.matmul(jt.float32([[3]]), jt.float32([[4]])).data assert a.shape == (1,1) and a[0,0] == 12 a = np.array([[1, 0], [0, 1]]).astype("float32") b = np.array([[4, 1], [2, 2]]).astype("float32") c = np.matmul(a, b) jtc = jt.matmul(jt.array(a), jt.array(b)).data assert np.all(jtc == c)
def spmm(spase_x, y): assert isinstance(spase_x, SparseVar) and isinstance(y, jt.Var) assert spase_x.ndim == 2 and y.ndim == 2 and spase_x.shape[-1] == y.shape[0] # TODO x = spase_x.to_dense() return jt.matmul(x, y)
def pullaway_loss(embeddings): norm = jt.sqrt((embeddings ** 2).sum(1,keepdims=True)) normalized_emb = embeddings / norm similarity = jt.matmul(normalized_emb, normalized_emb.transpose(1, 0)) batch_size = embeddings.size(0) loss_pt = (jt.sum(similarity) - batch_size) / (batch_size * (batch_size - 1)) return loss_pt
def linear(x, n): w = jt.make_var([n, x.shape[-1]], init=lambda *a: init.invariant_uniform(*a)) w = w.reindex([w.shape[1], w.shape[0]], ["i1", "i0"]) bound = 1.0 / math.sqrt(w.shape[0]) b = jt.make_var([n], init=lambda *a: init.uniform(*a, -bound, bound)) return jt.matmul(x, w) + b
def get_rotation_matrix(tx, ty, tz): m_x = jt.zeros((tx.shape[0], 3, 3)) m_y = jt.zeros((tx.shape[0], 3, 3)) m_z = jt.zeros((tx.shape[0], 3, 3)) m_x[:, 1, 1], m_x[:, 1, 2] = tx.cos(), -tx.sin() m_x[:, 2, 1], m_x[:, 2, 2] = tx.sin(), tx.cos() m_x[:, 0, 0] = 1 m_y[:, 0, 0], m_y[:, 0, 2] = ty.cos(), ty.sin() m_y[:, 2, 0], m_y[:, 2, 2] = -ty.sin(), ty.cos() m_y[:, 1, 1] = 1 m_z[:, 0, 0], m_z[:, 0, 1] = tz.cos(), -tz.sin() m_z[:, 1, 0], m_z[:, 1, 1] = tz.sin(), tz.cos() m_z[:, 2, 2] = 1 return jt.matmul(m_z, jt.matmul(m_y, m_x))
def projection(vertices, K, R, t, dist_coeffs, orig_size, eps=1e-9): ''' Calculate projective transformation of vertices given a projection matrix Input parameters: K: batch_size * 3 * 3 intrinsic camera matrix R, t: batch_size * 3 * 3, batch_size * 1 * 3 extrinsic calibration parameters dist_coeffs: vector of distortion coefficients orig_size: original size of image captured by the camera Returns: For each point [X,Y,Z] in world coordinates [u,v,z] where u,v are the coordinates of the projection in pixels and z is the depth ''' # instead of P*x we compute x'*P' vertices = jt.matmul(vertices, R.transpose((0, 2, 1))[0]) + t x, y, z = vertices[:, :, 0], vertices[:, :, 1], vertices[:, :, 2] x_ = x / (z + eps) y_ = y / (z + eps) # Get distortion coefficients from vector k1 = dist_coeffs[:, 0].unsqueeze(1) k2 = dist_coeffs[:, 1].unsqueeze(1) p1 = dist_coeffs[:, 2].unsqueeze(1) p2 = dist_coeffs[:, 3].unsqueeze(1) k3 = dist_coeffs[:, 4].unsqueeze(1) # we use x_ for x' and x__ for x'' etc. x_2 = x_.sqr() y_2 = y_.sqr() r = jt.sqrt(x_2 + y_2) r2 = r.sqr() r4 = r2.sqr() r6 = r4 * r2 tmp = k1 * (r2) + k2 * (r4) + k3 * (r6) + 1 x__ = x_ * tmp + 2 * p1 * x_ * y_ + p2 * (r2 + 2 * x_2) y__ = y_ * tmp + p1 * (r2 + 2 * y_2) + 2 * p2 * x_ * y_ vertices = jt.stack([x__, y__, jt.ones(z.shape)], dim=-1) vertices = jt.matmul(vertices, K.transpose((0, 2, 1))[0]) u, v = vertices[:, :, 0], vertices[:, :, 1] v = orig_size - v # map u,v from [0, img_size] to [-1, 1] to use by the renderer u = 2 * (u - orig_size / 2.) / orig_size v = 2 * (v - orig_size / 2.) / orig_size vertices = jt.stack([u, v, z], dim=-1) return vertices
def execute(self, x, edge_index, edge_weight: OptVar = None, batch: OptVar = None, lambda_max: OptVar = None): """""" if self.normalization != 'sym' and lambda_max is None: raise ValueError('You need to pass `lambda_max` to `execute() in`' 'case the normalization is non-symmetric.') if lambda_max is None: lambda_max = Var([2.0]) if not isinstance(lambda_max, Var): lambda_max = Var([lambda_max]) assert lambda_max is not None edge_index, norm = self.__norm__(edge_index, x.size(self.node_dim), edge_weight, self.normalization, lambda_max, dtype=x.dtype, batch=batch) Tx_0 = x # Tx_1 = x # Dummy. out = jt.matmul(Tx_0, self.weight[0]) # print('self weight:', self.weight) if self.weight.size(0) > 1: # print('norm: ', norm.shape, # norm.min(), norm.max()) Tx_1 = self.propagate(edge_index, x=x, norm=norm, size=None) # print('Tx_1: ', Tx_1.shape, Tx_1.min(), Tx_1.max()) out = out + jt.matmul(Tx_1, self.weight[1]) for k in range(2, self.weight.size(0)): Tx_2 = self.propagate(edge_index, x=Tx_1, norm=norm, size=None) Tx_2 = 2. * Tx_2 - Tx_0 out = out + jt.matmul(Tx_2, self.weight[k]) Tx_0, Tx_1 = Tx_1, Tx_2 if self.bias is not None: out += self.bias return out
def execute(self, x): batch_size = x.size(0) x = jt.matmul(self.laplacian, x) dims = tuple(range(len(x.shape))[1:]) x = x.pow(2).sum(dims) if self.average: return x.sum() / batch_size else: return x
def test_matmul_op(self): a = np.array([[1, 0], [0, 1]]).astype("float32") b = np.array([[4, 1], [2, 2]]).astype("float32") c = np.matmul(a, b) jtc = jt.matmul(jt.array(a), jt.array(b)).data assert np.allclose(jtc, c) a = np.random.random((128,3,10,20)) b = np.random.random((20,30)) c = np.matmul(a, b) jtc = jt.matmul(jt.array(a), jt.array(b)).data assert np.allclose(jtc, c) a = np.random.random((128,3,10,20)) b = np.random.random((128,3,20,30)) c = np.matmul(a, b) jtc = jt.matmul(jt.array(a), jt.array(b)).data assert np.allclose(jtc, c), np.abs(jtc-c).max()
def check(a_shape, b_shape): a = jt.random(a_shape) b = jt.random(b_shape) c = jt.matmul(a, b) cc = np.matmul(a.data, b.data) assert c.shape == cc.shape or (cc.shape==() and c.shape==[1]), (c.shape, cc.shape) assert np.allclose(c.data, cc), (c.data-cc) da, db = jt.grad(c, [a, b]) assert da.shape == a.shape assert db.shape == b.shape
def __call__(self, plane=None, quat=None, weight=1): reg_rot = jt.transform.to_tensor(jt.array([0])) reg_plane = jt.transform.to_tensor(jt.array([0])) if plane: p = [normalize(i[:, 0:3]).unsqueeze(2) for i in plane] x = jt.contrib.concat(p, dim=2) # y = jt.transpose(x, [1,2]) y = jt.transpose(x, [0, 2, 1]) reg_plane = ( (jt.matmul(x, y) - self.eye).pow(2).sum(2).sum(1).mean() * weight) if quat: q = [i[:, 1:4].unsqueeze(2) for i in quat] x = jt.contrib.concat(q, dim=2) y = jt.transpose(x, [0, 2, 1]) reg_rot = ( (jt.matmul(x, y) - self.eye).pow(2).sum(2).sum(1).mean() * weight) return (reg_plane, reg_rot)
def mask_iou(mask1, mask2): """ Inputs inputs are matricies of size _ x N. Output is size _1 x _2. Note: if iscrowd is True, then mask2 should be the crowd. """ intersection = jt.matmul(mask1, mask2.transpose(1, 0)) area1 = jt.sum(mask1, dim=1).view(1, -1) area2 = jt.sum(mask2, dim=1).view(1, -1) union = (area1.t() + area2) - intersection return intersection / union
def test_matmul_example(self): a = jt.random([3]) b = jt.random([3]) c = jt.matmul(a, b) assert c.shape == [1] a = jt.random([3, 4]) b = jt.random([4]) c = jt.matmul(a, b) assert c.shape == [3] a = jt.random([10, 3, 4]) b = jt.random([4]) c = jt.matmul(a, b) assert c.shape == [10, 3] a = jt.random([10, 3, 4]) b = jt.random([4, 5]) c = jt.matmul(a, b) assert c.shape == [10, 3, 5] a = jt.random([10, 3, 4]) b = jt.random([10, 4, 5]) c = jt.matmul(a, b) assert c.shape == [10, 3, 5] a = jt.random([8, 1, 3, 4]) b = jt.random([10, 4, 5]) c = jt.matmul(a, b) assert c.shape == [8, 10, 3, 5]
def display_lincomb(proto_data, masks): out_masks = jt.matmul(proto_data, masks.t()) # out_masks = cfg.mask_proto_mask_activation(out_masks) for kdx in range(1): jdx = kdx + 0 import matplotlib.pyplot as plt coeffs = masks[jdx].numpy() idx = np.argsort(-np.abs(coeffs)) # plt.bar(list(range(idx.shape[0])), coeffs[idx]) # plt.show() coeffs_sort = coeffs[idx] arr_h, arr_w = (4, 8) proto_h, proto_w, _ = proto_data.shape arr_img = np.zeros([proto_h * arr_h, proto_w * arr_w]) arr_run = np.zeros([proto_h * arr_h, proto_w * arr_w]) test = jt.sum(proto_data, -1).numpy() for y in range(arr_h): for x in range(arr_w): i = arr_w * y + x if i == 0: running_total = proto_data[:, :, idx[i]].numpy() * coeffs_sort[i] else: running_total += proto_data[:, :, idx[i]].numpy( ) * coeffs_sort[i] running_total_nonlin = running_total if cfg.mask_proto_mask_activation == activation_func.sigmoid: running_total_nonlin = ( 1 / (1 + np.exp(-running_total_nonlin))) arr_img[y * proto_h:(y + 1) * proto_h, x * proto_w:(x + 1) * proto_w] = (proto_data[:, :, idx[i]] / jt.max( proto_data[:, :, idx[i]])).numpy() * coeffs_sort[i] arr_run[y * proto_h:(y + 1) * proto_h, x * proto_w:(x + 1) * proto_w] = (running_total_nonlin > 0.5).astype( np.float) plt.imshow(arr_img) plt.show() # plt.imshow(arr_run) # plt.show() # plt.imshow(test) # plt.show() plt.imshow(out_masks[:, :, jdx].numpy()) plt.show()
def execute(self, trans_points, cp, voxel, gridSize, weight=1): if len(trans_points.shape) == 4: trans_points = trans_points.squeeze(dim=-1) nb = pointClosestCellIndex(trans_points) idx = jt.matmul( nb, jt.transform.to_tensor(jt.array([(gridSize**2), gridSize, 1]))) mask = (1 - voxel.view((-1), (gridSize**3)).gather(1, idx)) idx = idx.unsqueeze(2) idx = idx.repeat(1, 1, 3) mask = mask.unsqueeze(2).repeat(1, 1, 3) closest_points = cp.gather(1, idx) self.constant = weight distance = (trans_points - closest_points) distance = (distance * mask) # self.save_for_backward(distance) self.saved_tensors = distance return (jt.mean(jt.sum(jt.sum(jt.pow(distance, 2), 2), 1)) * weight)
def execute(self, xyz, points): """ Input: xyz: input points position data, [B, C, N] points: input points data, [B, D, N] Return: new_xyz: sampled points position data, [B, C, S] new_points_concat: sample points feature data, [B, D', S] """ B = xyz.shape[0] N = xyz.shape[2] xyz = xyz.permute(0, 2, 1) if points is not None: points = points.permute(0, 2, 1) xyz_density = compute_density(xyz, self.bandwidth) density_scale = self.densitynet(xyz_density) if self.group_all: new_xyz, new_points, grouped_xyz_norm, grouped_density = sample_and_group_all( xyz, points, density_scale.reshape(B, N, 1)) else: new_xyz, new_points, grouped_xyz_norm, _, grouped_density = sample_and_group( self.npoint, self.nsample, xyz, points, density_scale.reshape(B, N, 1)) new_points = new_points.permute(0, 3, 2, 1) # [B, C+D, nsample,npoint] for i in range(len(self.mlp_convs)): # print ('new_point shape', new_points.shape) conv = self.mlp_convs[i] bn = self.mlp_bns[i] new_points = self.relu(bn(conv(new_points))) grouped_xyz = grouped_xyz_norm.permute(0, 3, 2, 1) weights = self.weightnet(grouped_xyz) new_points = new_points * grouped_density.permute(0, 3, 2, 1) new_points = jt.matmul(new_points.permute(0, 3, 1, 2), weights.permute(0, 3, 2, 1)).reshape(B, self.npoint, -1) new_points = self.linear(new_points) new_points = self.bn_linear(new_points.permute(0, 2, 1)) new_points = self.relu(new_points) new_xyz = new_xyz.permute(0, 2, 1) return new_xyz, new_points
def square_distance(src, dst): """ Calculate Euclid distance between each two points. src^T * dst = xn * xm + yn * ym + zn * zm; sum(src^2, dim=-1) = xn*xn + yn*yn + zn*zn; sum(dst^2, dim=-1) = xm*xm + ym*ym + zm*zm; dist = (xn - xm)^2 + (yn - ym)^2 + (zn - zm)^2 = sum(src**2,dim=-1)+sum(dst**2,dim=-1)-2*src^T*dst Input: src: source points, [B, N, C] dst: target points, [B, M, C] Output: dist: per-point square distance, [B, N, M] """ B, N, _ = src.shape _, M, _ = dst.shape dist = -2 * jt.matmul(src, dst.permute(0, 2, 1)) dist += jt.sum(src**2, -1).view(B, N, 1) dist += jt.sum(dst**2, -1).view(B, 1, M) return dist
def mask_iou(masks_a, masks_b, iscrowd=False): """ Computes the pariwise mask IoU between two sets of masks of size [a, h, w] and [b, h, w]. The output is of size [a, b]. Wait I thought this was "box_utils", why am I putting this in here? """ masks_a = masks_a.view(masks_a.shape[0], -1).float() masks_b = masks_b.view(masks_b.shape[0], -1).float() #print('mask_a',masks_a.sum()) #print('masks_b',masks_b.sum()) intersection = jt.matmul(masks_a, masks_b.transpose(1, 0)) #print('intersection',intersection) area_a = masks_a.sum(dim=1).unsqueeze(1) area_b = masks_b.sum(dim=1).unsqueeze(0) return intersection / (area_a + area_b - intersection ) if not iscrowd else intersection / area_a
def square_distance(tensor1, tensor2): """ Calculate Euclid distance between each two points. tensor1^T * tensor2 = xn * xm + yn * ym + zn * zm; sum(tensor1^2, dim=-1) = xn*xn + yn*yn + zn*zn; sum(tensor2^2, dim=-1) = xm*xm + ym*ym + zm*zm; dist = (xn - xm)^2 + (yn - ym)^2 + (zn - zm)^2 = sum(tensor1**2,dim=-1)+sum(tensor2**2,dim=-1)-2*tensor1^T*dst Input: tensor1: source points, [B, N, C] tensor2: target points, [B, M, C] Output: dist: per-point square distance, [B, N, M] """ # print (src.size(), dst.size()) B, N, _ = tensor1.shape _, M, _ = tensor2.shape dist = -2 * jt.matmul(tensor1, tensor2.permute(0, 2, 1)) dist += jt.sum(tensor1**2, -1).view(B, N, 1) dist += jt.sum(tensor2**2, -1).view(B, 1, M) return dist
def look(vertices, eye, direction=[0, 1, 0], up=None): """ "Look" transformation of vertices. """ if len(vertices.shape) != 3: raise ValueError('vertices Tensor should have 3 dimensions') direction = jt.array(direction).float32() if isinstance(eye, tuple): eye = jt.array(list(eye)).float32() else: eye = jt.array(eye).float32() if up is None: up = jt.array([0, 1, 0]).float32() if len(eye.shape) == 1: eye = eye.unsqueeze(0) if len(direction.shape) == 1: direction = direction.unsqueeze(0) if len(up.shape) == 1: up = up.unsqueeze(0) # create new axes z_axis = jt.normalize(direction, eps=1e-5) x_axis = jt.normalize(jt.cross(up, z_axis), eps=1e-5) y_axis = jt.normalize(jt.cross(z_axis, x_axis), eps=1e-5) # create rotation matrix: [bs, 3, 3] r = jt.contrib.concat( (x_axis.unsqueeze(1), y_axis.unsqueeze(1), z_axis.unsqueeze(1)), dim=1) # apply # [bs, nv, 3] -> [bs, nv, 3] -> [bs, nv, 3] if vertices.shape != eye.shape: eye = eye.unsqueeze(1) vertices = vertices - eye vertices = jt.matmul(vertices, r.transpose(0, 2, 1)) return vertices
def look_at(vertices, eye, at=[0, 0, 0], up=[0, 1, 0]): """"Look at" transformation of vertices. The z axis is changed to (at - eye). Original vertices are transformed to the new axis. """ if len(vertices.shape) != 3: raise ValueError('vertices Tensor should have 3 dimensions') at = jt.array(at).float32() up = jt.array(up).float32() if isinstance(eye, tuple): eye = jt.array(list(eye)).float32() else: eye = jt.array(eye).float32() batch_size = vertices.shape[0] if len(eye.shape) == 1: eye = eye.broadcast([batch_size] + eye.shape) if len(at.shape) == 1: at = at.broadcast([batch_size] + at.shape) if len(up.shape) == 1: up = up.broadcast([batch_size] + up.shape) # create new axes # eps is chosen as 0.5 to match the chainer version z_axis = jt.normalize(at - eye, eps=1e-5) x_axis = jt.normalize(jt.cross(up, z_axis), eps=1e-5) y_axis = jt.normalize(jt.cross(z_axis, x_axis), eps=1e-5) # create rotation matrix: [bs, 3, 3] r = jt.contrib.concat( (x_axis.unsqueeze(1), y_axis.unsqueeze(1), z_axis.unsqueeze(1)), dim=1) # apply # [bs, nv, 3] -> [bs, nv, 3] -> [bs, nv, 3] if vertices.shape != eye.shape: eye = eye.unsqueeze(1) vertices = vertices - eye vertices = jt.matmul(vertices, r.transpose(0, 2, 1)[0]) return vertices
def execute(self, x): # (N, K, C_out) """ Applies XConv to the input data. :param x: (rep_pt, pts, fts) where - rep_pt: Representative point. - pts: Regional point cloud such that fts[:,p_idx,:] is the feature associated with pts[:,p_idx,:]. - fts: Regional features such that pts[:,p_idx,:] is the feature associated with fts[:,p_idx,:]. :return: Features aggregated into point rep_pt. """ rep_pt, pts, fts = x # b, n, c // b ,n k, c // b, n, k, d if fts is not None: assert(rep_pt.size()[0] == pts.size()[0] == fts.size()[0]) # Check N is equal. assert(rep_pt.size()[1] == pts.size()[1] == fts.size()[1]) # Check P is equal. assert(pts.size()[2] == fts.size()[2] == self.K) # Check K is equal. assert(fts.size()[3] == self.cin) # Check C_in is equal. else: assert(rep_pt.size()[0] == pts.size()[0]) # Check N is equal. assert(rep_pt.size()[1] == pts.size()[1]) # Check P is equal. assert(pts.size()[2] == self.K) # Check K is equal. assert(rep_pt.size()[2] == pts.size()[3] == self.dims) # Check dims is equal. N = pts.size()[0] P = rep_pt.size()[1] # (N, P, K, dims) p_center = jt.unsqueeze(rep_pt, dim = 2) # (N, P, 1, dims) # print (p_center.size()) # # Move pts to local coordinate system of rep_pt. pts_local = pts - p_center.repeat(1, 1, self.K, 1) # (N, P, K, dims) # pts_local = self.pts_layernorm(pts - p_center) # Individually lift each point into C_mid space. # print (pts_local.size(), 'before size') pts_local = pts_local.permute(0, 3, 1, 2) # N, dim, P, K fts_lifted0 = self.dense1(pts_local) # ? # print (.size(), 'after size') fts_lifted = self.dense2(fts_lifted0) # N, C_mid, P, K fts = fts.permute(0, 3, 1, 2) if fts is None: fts_cat = fts_lifted else: fts_cat = concat((fts_lifted, fts), 1) # (N, C_mid + C_in, P, K) # Learn the (N, K, K) X-transformation matrix. X_shape = (N, P, self.K, self.K) # X = self.x_trans(pts_local) # N, K*K, 1, P x = self.x_trans_0(pts_local) x = self.x_trans_1(x) X = self.x_trans_2(x) # print ('X size ', X.size()) X = X.permute(0, 2, 3, 1) # n p 1 k X = X.view(X_shape) # N, P, K, K # print (fts_cat.shape) fts_cat = fts_cat.permute(0, 2, 3, 1) fts_X = jt.matmul(X, fts_cat) # # print ('fts X size =', fts_X.shape) fts_p = self.end_conv(fts_X).squeeze(dim = 2) # print ('xxxxxxxxxxx') # print ('result size') # print (fts_X.size(), fts_p.size()) return fts_p
def linear(x, n): w = jt.make_var([x.shape[-1], n], init=ops.random) return jt.matmul(x, w)
def linear(x, n): w = jt.make_var([x.shape[-1], n], init=lambda *a: (jt.random(*a) - f32(0.5)) / f32(x.shape[-1])**f32(0.5)) b = jt.make_var([n], init=lambda *a: jt.random(*a) - f32(0.5)) return jt.matmul(x, w) + b
def postprocess(det_output, w, h, batch_idx=0, interpolation_mode='bilinear', visualize_lincomb=False, crop_masks=True, score_threshold=0): """ Postprocesses the output of Yolact on testing mode into a format that makes sense, accounting for all the possible configuration settings. Args: - det_output: The lost of dicts that Detect outputs. - w: The real with of the image. - h: The real height of the image. - batch_idx: If you have multiple images for this batch, the image's index in the batch. - interpolation_mode: Can be 'nearest' | 'area' | 'bilinear' (see jt.nn.functional.interpolate) Returns 4 jt Tensors (in the following order): - classes [num_det]: The class idx for each detection. - scores [num_det]: The confidence score for each detection. - boxes [num_det, 4]: The bounding box for each detection in absolute point form. - masks [num_det, h, w]: Full image masks for each detection. """ dets = det_output[batch_idx] net = dets['net'] dets = dets['detection'] if dets is None: return [jt.array([]) ] * 4 # Warning, this is 4 copies of the same thing if score_threshold > 0: keep = dets['score'] > score_threshold for k in dets: if k != 'proto': dets[k] = dets[k][keep] if dets['score'].shape[0] == 0: return [jt.array([])] * 4 # Actually extract everything from dets now classes = dets['class'] boxes = dets['box'] scores = dets['score'] masks = dets['mask'] if cfg.mask_type == mask_type.lincomb and cfg.eval_mask_branch: # At this points masks is only the coefficients proto_data = dets['proto'] # Test flag, do not upvote if cfg.mask_proto_debug: np.save('scripts/proto.npy', proto_data.numpy()) if visualize_lincomb: display_lincomb(proto_data, masks) masks = jt.matmul(proto_data, masks.transpose(1, 0)) masks = cfg.mask_proto_mask_activation(masks) # Crop masks before upsampling because you know why if crop_masks: masks = crop(masks, boxes) # Permute into the correct output shape [num_dets, proto_h, proto_w] masks = masks.permute(2, 0, 1) if cfg.use_maskiou: with timer.env('maskiou_net'): with jt.no_grad(): maskiou_p = net.maskiou_net(masks.unsqueeze(1)) maskiou_p = jt.gather( maskiou_p, dim=1, index=classes.unsqueeze(1)).squeeze(1) if cfg.rescore_mask: if cfg.rescore_bbox: scores = scores * maskiou_p else: scores = [scores, scores * maskiou_p] # Scale masks up to the full image masks = nn.interpolate(masks.unsqueeze(0), (h, w), mode=interpolation_mode, align_corners=False).squeeze(0) # Binarize the masks masks = masks > 0.5 boxes[:, 0], boxes[:, 2] = sanitize_coordinates(boxes[:, 0], boxes[:, 2], w, cast=False) boxes[:, 1], boxes[:, 3] = sanitize_coordinates(boxes[:, 1], boxes[:, 3], h, cast=False) boxes = boxes.int32() if cfg.mask_type == mask_type.direct and cfg.eval_mask_branch: # Upscale masks full_masks = jt.zeros(masks.shape[0], h, w) for jdx in range(masks.shape[0]): x1, y1, x2, y2 = boxes[jdx] mask_w = x2 - x1 mask_h = y2 - y1 # Just in case if mask_w * mask_h <= 0 or mask_w < 0: continue mask = masks[jdx].view(1, 1, cfg.mask_size, cfg.mask_size) mask = nn.interpolate(mask, (mask_h, mask_w), mode=interpolation_mode, align_corners=False) mask = (mask > 0.5).float() full_masks[jdx, y1:y2, x1:x2] = mask masks = full_masks return classes, scores, boxes, masks
def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=True, labels=()): """Performs Non-Maximum Suppression (NMS) on inference results Returns: detections with shape: nx6 (x1, y1, x2, y2, conf, cls) """ nc = prediction.shape[2] - 5 # number of classes xc = prediction[..., 4] > conf_thres # candidates # Settings min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height max_det = 300 # maximum number of detections per image max_nms = 30000 # maximum number of boxes into nms() time_limit = 10.0 # seconds to quit after redundant = True # require redundant detections multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) merge = False # use merge-NMS t = time.time() output = [jt.zeros((0, 6))] * prediction.shape[0] for xi, x in enumerate(prediction): # image index, image inference # Apply constraints # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height x = x[xc[xi]] # confidence # Cat apriori labels if autolabelling if labels and len(labels[xi]): l = labels[xi] v = jt.zeros((len(l), nc + 5)) v[:, :4] = l[:, 1:5] # box v[:, 4] = 1.0 # conf v[range(len(l)), l[:, 0].int32() + 5] = 1.0 # cls x = jt.contrib.concat((x, v), 0) # If none remain process next image if not x.shape[0]: continue # Compute conf x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf # Box (center x, center y, width, height) to (x1, y1, x2, y2) box = xywh2xyxy(x[:, :4]) # Detections matrix nx6 (xyxy, conf, cls) if multi_label: i, j = (x[:, 5:] > conf_thres).nonzero().transpose(1, 0) x = jt.contrib.concat( (box[i], x[i, j + 5, None], j[:, None].float()), 1) else: # best class only j, conf = x[:, 5:].argmax(1, keepdims=True) x = jt.contrib.concat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] # Filter by class if classes is not None: x = x[(x[:, 5:6] == jt.array(classes)).any(1)] # Check shape n = x.shape[0] # number of boxes if not n: # no boxes continue elif n > max_nms: # excess boxes x = x[x[:, 4].argsort( descending=True)[0][:max_nms]] # sort by confidence # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores i = jt.nms(jt.contrib.concat([boxes, scores.unsqueeze(1)], dim=1), iou_thres) # NMS if i.shape[0] > max_det: # limit detections i = i[:max_det] if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix weights = iou * scores[None] # box weights x[i, :4] = jt.matmul(weights, x[:, :4]).float() / weights.sum( 1, keepdims=True) # merged boxes if redundant: i = i[iou.sum(1) > 1] # require redundancy output[xi] = x[i] if (time.time() - t) > time_limit: print(f'WARNING: NMS time limit {time_limit}s exceeded') break # time limit exceeded return output
def execute(self, xyz1, xyz2, points1, points2): """ Input: xyz1: input points position data, [B, C, N] xyz2: sampled input points position data, [B, C, S] points1: input points data, [B, D, N] points2: input points data, [B, D, S] Return: new_points: upsampled points data, [B, D', N] """ # print ('xyz1.shape, xyz2.shape') # print (xyz1.shape, xyz2.shape, points1.shape, points2.shape) xyz1 = xyz1.permute(0, 2, 1) xyz2 = xyz2.permute(0, 2, 1) points1 = points1.permute(0, 2, 1) points2 = points2.permute(0, 2, 1) B, N, C = xyz1.shape _, S, _ = xyz2.shape # points2 = points2.permute(0, 2, 1) # print (xyz1.shape, xyz2.shape) dists = square_distance(xyz1, xyz2) idx, dists = jt.argsort(dists, dim=-1) dists, idx = dists[:, :, :3], idx[:, :, :3] # [B, N, 3] dist_recip = 1.0 / (dists + 1e-8) norm = jt.sum(dist_recip, dim=2, keepdims=True) weight = dist_recip / norm interpolated_points = jt.sum(index_points(points2, idx) * weight.view(B, N, 3, 1), dim=2) # print ('interpolated_points shape', interpolated_points.shape) xyz_density = compute_density(xyz1, self.bandwidth) density_scale = self.densitynet(xyz_density) new_xyz, new_points, grouped_xyz_norm, _, grouped_density = sample_and_group( N, self.nsample, xyz1, interpolated_points, density_scale.reshape(B, N, 1)) new_points = new_points.permute(0, 3, 2, 1) # [B, C+D, nsample,npoint] for i in range(len(self.mlp_convs)): conv = self.mlp_convs[i] bn = self.mlp_bns[i] # print ('new new new point shape', new_points.shape) new_points = self.relu(bn(conv(new_points))) grouped_xyz = grouped_xyz_norm.permute(0, 3, 2, 1) weights = self.weightnet(grouped_xyz) new_points = new_points * grouped_density.permute(0, 3, 2, 1) new_points = jt.matmul(new_points.permute(0, 3, 1, 2), weights.permute(0, 3, 2, 1)).reshape(B, N, -1) new_points = self.linear(new_points) new_points = self.bn_linear(new_points.permute(0, 2, 1)) new_points = self.relu(new_points) new_xyz = new_xyz.permute(0, 2, 1) return new_points
def test_matmul_op(self): a = np.array([[1, 0], [0, 1]]).astype("float32") b = np.array([[4, 1], [2, 2]]).astype("float32") c = np.matmul(a, b) jtc = jt.matmul(jt.array(a), jt.array(b)).data assert np.all(jtc == c)