def execute(self, mesh, eyes=None): if self.Gbuffer == "albedo": return mesh if self.Gbuffer == "normal" or self.Gbuffer == "depth": mesh.textures = jt.ones_like(mesh.textures) if self.light_mode == 'surface': diffuseLight = jt.zeros(mesh.faces.shape) specularLight = jt.zeros(mesh.faces.shape) diffuseLight = self.ambient(diffuseLight) for directional in self.directionals: [diffuseLight, specularLight] = directional( diffuseLight, specularLight, mesh.surface_normals, (jt.sum(mesh.face_vertices, dim=2) / 3.0), eyes, mesh.with_specular, mesh.metallic_textures, mesh.roughness_textures) if len(mesh.textures.shape) == 4: mesh.textures = jt.clamp( mesh.textures * diffuseLight.unsqueeze(2) + jt.ones_like(mesh.textures) * specularLight.unsqueeze(2), 0.0, 1.0) elif len(mesh.textures.shape) == 6: mesh.textures = jt.clamp( mesh.textures * diffuseLight.unsqueeze(2).unsqueeze(2).unsqueeze(2) + jt.ones_like(mesh.textures) * specularLight.unsqueeze(2).unsqueeze(2).unsqueeze(2), 0.0, 1.0) elif self.light_mode == 'vertex': diffuseLight = jt.zeros(mesh.vertices.shape) specularLight = jt.zeros(mesh.vertices.shape) diffuseLight = self.ambient(diffuseLight) for directional in self.directionals: [diffuseLight, specularLight ] = directional(diffuseLight, specularLight, mesh.vertex_normals, mesh.vertices, eyes, mesh.with_specular, mesh.metallic_textures, mesh.roughness_textures) if len(mesh.textures.shape) == 4: mesh.textures = jt.clamp( mesh.textures * diffuseLight.unsqueeze(2) + jt.ones_like(mesh.textures) * specularLight.unsqueeze(2), 0.0, 1.0) elif len(mesh.textures.shape) == 6: mesh.textures = jt.clamp( mesh.textures * diffuseLight.unsqueeze(2).unsqueeze(2).unsqueeze(2) + jt.ones_like(mesh.textures) * specularLight.unsqueeze(2).unsqueeze(2).unsqueeze(2), 0.0, 1.0) return mesh
def sample_pdf(bins, weights, N_samples, det=False): # Get pdf weights = weights + 1e-5 # prevent nans pdf = weights / jt.sum(weights, -1, keepdims=True) cdf = jt.cumsum(pdf, -1) cdf = jt.concat([jt.zeros_like(cdf[..., :1]), cdf], -1) # (batch, len(bins)) # Take uniform samples if det: u = jt.linspace(0., 1., steps=N_samples) u = u.expand(list(cdf.shape[:-1]) + [N_samples]) else: u = jt.random(list(cdf.shape[:-1]) + [N_samples]) # Invert CDF inds = jt.searchsorted(cdf, u, right=True) below = jt.maximum(jt.zeros_like(inds - 1), inds - 1) above = jt.minimum((cdf.shape[-1] - 1) * jt.ones_like(inds), inds) inds_g = jt.stack([below, above], -1) # (batch, N_samples, 2) matched_shape = [inds_g.shape[0], inds_g.shape[1], cdf.shape[-1]] cdf_g = jt.gather(cdf.unsqueeze(1).expand(matched_shape), 2, inds_g) bins_g = jt.gather(bins.unsqueeze(1).expand(matched_shape), 2, inds_g) denom = (cdf_g[..., 1] - cdf_g[..., 0]) denom[denom < 1e-5] = 1.0 t = (u - cdf_g[..., 0]) / denom samples = bins_g[..., 0] + t * (bins_g[..., 1] - bins_g[..., 0]) return samples
def get_rays(H, W, focal, c2w, intrinsic=None): i, j = jt.meshgrid(jt.linspace(0, W - 1, W), jt.linspace(0, H - 1, H)) i = i.t() j = j.t() if intrinsic is None: dirs = jt.stack([(i - W * .5) / focal, (j - H * .5) / focal, jt.ones_like(i)], -1).unsqueeze(-2) else: i += 0.5 j += 0.5 dirs = jt.stack([i, j, jt.ones_like(i)], -1).unsqueeze(-2) dirs = jt.sum(dirs * intrinsic[:3, :3], -1).unsqueeze(-2) # Rotate ray directions from camera frame to the world frame rays_d = jt.sum( dirs * c2w[:3, :3], -1) # dot product, equals to: [c2w.dot(dir) for dir in dirs] # Translate camera frame's origin to the world frame. It is the origin of all rays. rays_o = c2w[:3, -1].expand(rays_d.shape) return rays_o, rays_d
def execute(self, locations, box_cls, box_regression, centerness, proposal_embed, proposal_margin, pixel_embed, image_sizes, targets, benchmark, timers): """ Arguments: anchors: list[list[BoxList]] box_cls: list[tensor] box_regression: list[tensor] image_sizes: list[(h, w)] Returns: boxlists (list[BoxList]): the post-processed anchors, after applying box decoding and NMS """ if benchmark and timers is not None: #jt.cuda.synchronize() timers[4].tic() sampled_boxes = [] for i, (l, o, b, c) in enumerate( zip(locations, box_cls, box_regression, centerness)): em = proposal_embed[i] mar = proposal_margin[i] if self.fix_margin: mar = jt.ones_like(mar) * self.init_margin sampled_boxes.append( self.forward_for_single_feature_map(l, o, b, c, em, mar, image_sizes, i)) if benchmark and timers is not None: timers[4].toc() timers[5].tic() boxlists = list(zip(*sampled_boxes)) boxlists = [cat_boxlist(boxlist) for boxlist in boxlists] boxlists = self.select_over_all_levels(boxlists) if benchmark and timers is not None: timers[5].toc() timers[6].tic() # resize pixel embedding for higher resolution N, dim, m_h, m_w = pixel_embed.shape o_h = m_h * self.mask_scale_factor o_w = m_w * self.mask_scale_factor pixel_embed = interpolate(pixel_embed, size=(o_h, o_w), mode='bilinear', align_corners=False) boxlists = self.forward_for_mask(boxlists, pixel_embed) if benchmark and timers is not None: timers[6].toc() return boxlists
def predict(self, images,score_thresh=0.7,nms_thresh = 0.3): N = images.shape[0] img_size = (images.shape[-1],images.shape[-2]) rpn_locs, rpn_scores,roi_cls_locs, roi_scores, rois, roi_indices = self.execute(images) roi_cls_locs = roi_cls_locs.reshape(roi_cls_locs.shape[0],-1,4) probs = nn.softmax(roi_scores,dim=-1) rois = rois.unsqueeze(1).repeat(1,self.n_class,1) cls_bbox = loc2bbox(rois.reshape(-1,4),roi_cls_locs.reshape(-1,4)) cls_bbox[:,0::2] = jt.clamp(cls_bbox[:,0::2],min_v=0,max_v=img_size[0]) cls_bbox[:,1::2] = jt.clamp(cls_bbox[:,1::2],min_v=0,max_v=img_size[1]) cls_bbox = cls_bbox.reshape(roi_cls_locs.shape) results = [] for i in range(N): index = jt.where(roi_indices==i)[0] score = probs[index,:] bbox = cls_bbox[index,:,:] boxes = [] scores = [] labels = [] for j in range(1,self.n_class): bbox_j = bbox[:,j,:] score_j = score[:,j] mask = jt.where(score_j>score_thresh)[0] bbox_j = bbox_j[mask,:] score_j = score_j[mask] dets = jt.contrib.concat([bbox_j,score_j.unsqueeze(1)],dim=1) keep = jt.nms(dets,nms_thresh) bbox_j = bbox_j[keep] score_j = score_j[keep] label_j = jt.ones_like(score_j).int32()*j boxes.append(bbox_j) scores.append(score_j) labels.append(label_j) boxes = jt.contrib.concat(boxes,dim=0) scores = jt.contrib.concat(scores,dim=0) labels = jt.contrib.concat(labels,dim=0) results.append((boxes,scores,labels)) return results
def integrator(raw, z_vals, rays_d, raw_noise_std=0, white_bkgd=False): """Transforms model's predictions to semantically meaningful values. Args: raw: [num_rays, num_samples along ray, 4]. Prediction from model. z_vals: [num_rays, num_samples along ray]. Integration time. rays_d: [num_rays, 3]. Direction of each ray. Returns: rgb_map: [num_rays, 3]. Estimated RGB color of a ray. disp_map: [num_rays]. Disparity map. Inverse of depth map. acc_map: [num_rays]. Sum of weights along each ray. weights: [num_rays, num_samples]. Weights assigned to each sampled color. depth_map: [num_rays]. Estimated distance to object. """ raw2alpha = lambda raw, dists, act_fn=jt.nn.relu: 1. - jt.exp(-act_fn(raw) * dists) dists = z_vals[..., 1:] - z_vals[..., :-1] dists = jt.concat([ dists, jt.array(np.array([1e10]).astype(np.float32)).expand( dists[..., :1].shape) ], -1) # [N_rays, N_samples] dists = dists * jt.norm(rays_d.unsqueeze(-2), p=2, dim=-1) rgb = jt.sigmoid(raw[..., :3]) # [N_rays, N_samples, 3] noise = 0. if raw_noise_std > 0.: noise = jt.init.gauss(raw[..., 3].shape, raw.dtype) * raw_noise_std alpha = raw2alpha(raw[..., 3] + noise, dists) # [N_rays, N_samples] weights = alpha * jt.cumprod( jt.concat([jt.ones( (alpha.shape[0], 1)), 1. - alpha + 1e-10], -1), -1)[:, :-1] rgb_map = jt.sum(weights.unsqueeze(-1) * rgb, -2) # [N_rays, 3] depth_map = jt.sum(weights * z_vals, -1) disp_map = 1. / jt.maximum(1e-10 * jt.ones_like(depth_map), depth_map / jt.sum(weights, -1)) acc_map = jt.sum(weights, -1) if white_bkgd: rgb_map = rgb_map + (1. - acc_map.unsqueeze(-1)) return rgb_map, disp_map, acc_map, weights, depth_map
def render(H, W, focal, chunk=1024 * 32, rays=None, c2w=None, intrinsic=None, ndc=True, near=0., far=1., use_viewdirs=False, c2w_staticcam=None, **kwargs): """Render rays Args: H: int. Height of image in pixels. W: int. Width of image in pixels. focal: float. Focal length of pinhole camera. chunk: int. Maximum number of rays to process simultaneously. Used to control maximum memory usage. Does not affect final results. rays: array of shape [2, batch_size, 3]. Ray origin and direction for each example in batch. c2w: array of shape [3, 4]. Camera-to-world transformation matrix. ndc: bool. If True, represent ray origin, direction in NDC coordinates. near: float or array of shape [batch_size]. Nearest distance for a ray. far: float or array of shape [batch_size]. Farthest distance for a ray. use_viewdirs: bool. If True, use viewing direction of a point in space in model. c2w_staticcam: array of shape [3, 4]. If not None, use this transformation matrix for camera while using other c2w argument for viewing directions. Returns: rgb_map: [batch_size, 3]. Predicted RGB values for rays. disp_map: [batch_size]. Disparity map. Inverse of depth. acc_map: [batch_size]. Accumulated opacity (alpha) along a ray. extras: dict with everything returned by render_rays(). """ if c2w is not None: # special case to render full image rays_o, rays_d = pinhole_get_rays(H, W, focal, c2w, intrinsic) else: # use provided ray batch rays_o, rays_d = rays if use_viewdirs: # provide ray directions as input viewdirs = rays_d if c2w_staticcam is not None: assert intrinsic is None rays_o, rays_d = pinhole_get_rays(H, W, focal, c2w_staticcam) viewdirs = viewdirs / jt.norm(viewdirs, p=2, dim=-1, keepdim=True) viewdirs = jt.reshape(viewdirs, [-1, 3]).float() sh = rays_d.shape # [..., 3] if ndc: # for forward facing scenes rays_o, rays_d = ndc_rays(H, W, focal, 1., rays_o, rays_d) # Create ray batch rays_o = jt.reshape(rays_o, [-1, 3]).float() rays_d = jt.reshape(rays_d, [-1, 3]).float() near, far = near * jt.ones_like(rays_d[..., :1]), far * jt.ones_like( rays_d[..., :1]) rays = jt.concat([rays_o, rays_d, near, far], -1) if use_viewdirs: rays = jt.concat([rays, viewdirs], -1) # Render and reshape all_ret = batchify_rays(rays, chunk, **kwargs) for k in all_ret: k_sh = list(sh[:-1]) + list(all_ret[k].shape[1:]) all_ret[k] = jt.reshape(all_ret[k], k_sh) k_extract = ['rgb_map', 'disp_map', 'acc_map'] ret_list = [all_ret[k] for k in k_extract] ret_dict = {k: all_ret[k] for k in all_ret if k not in k_extract} return ret_list + [ret_dict]
def output_masked_image(output_dir, selected_img, ori_img, mask, sidelength, channels): masked_image = jittor.ones_like(ori_img) masked_image[mask] = selected_img masked_image = form_image(masked_image, sidelength, channels) masked_image.save(os.path.join(output_dir, "masked.jpg"))
def build_targets(p, targets, model): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) det = model.model[-1] # Detect() module na, nt = det.na, targets.shape[0] # number of anchors, targets tcls, tbox, indices, anch = [], [], [], [] gain = jt.ones((7, )) # normalized to gridspace gain ai = jt.index( (na, ), dim=0).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) targets = jt.contrib.concat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices g = 0.5 # bias off = jt.array( [ [0, 0], # [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm ], ).float() * g # offsets for i in range(det.nl): anchors = det.anchors[i] gain[2:6] = jt.array( [p[i].shape[3], p[i].shape[2], p[i].shape[3], p[i].shape[2]]) # xyxy gain # Match targets to anchors t = targets * gain if nt: # Matches r = t[:, :, 4:6] / anchors[:, None] # wh ratio j = jt.maximum(r, 1. / r).max(2) < model.hyp['anchor_t'] # compare # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) t = t[j] # filter # Offsets gxy = t[:, 2:4] # grid xy gxi = gain[jt.array([2, 3])] - gxy # inverse # j, k = jt.logical_and((gxy % 1. < g), (gxy > 1.)).int().transpose(1,0).bool() # l, m = jt.logical_and((gxi % 1. < g),(gxi > 1.)).int().transpose(1,0).bool() jk = jt.logical_and((gxy % 1. < g), (gxy > 1.)) lm = jt.logical_and((gxi % 1. < g), (gxi > 1.)) j, k = jk[:, 0], jk[:, 1] l, m = lm[:, 0], lm[:, 1] j = jt.stack((jt.ones_like(j), )) t = t.repeat((off.shape[0], 1, 1))[j] offsets = (jt.zeros_like(gxy)[None] + off[:, None])[j] else: t = targets[0] offsets = 0 # Define b = t[:, 0].int32() c = t[:, 1].int32() # image, class gxy = t[:, 2:4] # grid xy gwh = t[:, 4:6] # grid wh gij = (gxy - offsets).int32() gi, gj = gij[:, 0], gij[:, 1] # grid xy indices # Append a = t[:, 6].int32() # anchor indices indices.append((b, a, gj.clamp(0, gain[3] - 1), gi.clamp(0, gain[2] - 1))) # image, anchor, grid indices tbox.append(jt.contrib.concat((gxy - gij, gwh), 1)) # box anch.append(anchors[a]) # anchors tcls.append(c) # class return tcls, tbox, indices, anch