def transform(self, skels): d_skels = skels[:, 1:, :] - skels[:, :-1, :] segment_len = (d_skels**2).sum(2).sqrt().mean(1) angles = torch.atan2(d_skels[:, :, 0], d_skels[:, :, 1]) self._unwrap_t(angles) mean_angle = angles.mean(1) angles -= angles.mean(1).view(-1, 1) eigenworms = torch.matmul(angles, self.eigen_components.t()) head_coords = skels[:, 0, :].contiguous() return head_coords, segment_len, mean_angle, eigenworms
def decode(sin_t, cos_t): theta = torch.atan2(sin_t.float(), cos_t.float()) return theta
def points_xyz_to_cylinder(points_xyz): points_x, points_y, points_z = torch.unbind(points_xyz, axis=-1) points_rho = torch.sqrt(points_x**2 + points_y**2) points_phi = torch.atan2(points_y, points_x) points_cylinder = torch.stack([points_phi, points_z, points_rho], axis=-1) return points_cylinder
def extract_descriptor(self, images): """ Main function of this class, which extracts the descriptors from a batch of images. Args: images : list of 2D array of int or float. List of images to form the batch. All images must have be grayscale (only two dimensions) and its values are assumed to be in the interval [0, 255]. All images must have the same size. Returns: (This explanation below is modified from scikit-image). descs : 4D array of floats Grid of DAISY descriptors for the given image as an array dimensionality (N, R, P, Q) where ``N = len(images)`` ``R = (rings * histograms + 1) * orientations`` ``P = ceil((M - radius*2) / step)`` ``Q = ceil((N - radius*2) / step)`` """ images = np.stack(images, axis=0)[:, None] images = torch.from_numpy(images.astype(np.float32)) / 255.0 if self.fp16: images = images.half() else: images = images.float() if self.cuda: images = images.cuda() self.batch_size = images.shape[0] self.max_batch_size = max(self.max_batch_size, self.batch_size) if (self.dx is None or self.dx.shape[0] < self.max_batch_size or self.dx.shape[2] != images.shape[2] or self.dx.shape[3] != images.shape[3]): shape = (self.max_batch_size,) + images.shape[1:] self.dx = torch.zeros(shape) if self.fp16: self.dx = self.dx.half() else: self.dx = self.dx.float() if self.cuda: self.dx = self.dx.cuda() self.dy = torch.zeros(shape) if self.fp16: self.dy = self.dy.half() else: self.dy = self.dy.float() if self.cuda: self.dy = self.dy.cuda() dx = self.dx[:self.batch_size] dx[:, :, :, :-1] = (images[:, :, :, 1:] - images[:, :, :, :-1]) dy = self.dy[:self.batch_size] dy[:, :, :-1, :] = (images[:, :, 1:, :] - images[:, :, :-1, :]) # Compute gradient orientation and magnitude and their contribution # to the histograms. grad_mag = torch.sqrt(dx ** 2 + dy ** 2) grad_ori = torch.atan2(dy, dx) hist = torch.exp(self.orientation_kappa * torch.cos( grad_ori - self.orientation_angles)) hist *= grad_mag # Smooth orientation histograms for the center and all rings. hist_smooth = self._compute_ring_histograms(hist) # Assemble descriptor grid. theta = np.array([2 * np.pi * j / self.histograms for j in range(self.histograms)]) desc_dims = (self.rings * self.histograms + 1) * self.orientations desc_shape = (self.max_batch_size, desc_dims, images.shape[2] - 2 * self.radius, images.shape[3] - 2 * self.radius) if self.descs is None or self.descs.shape != desc_shape: self.descs = torch.empty(desc_shape) if self.fp16: self.descs = self.descs.half() else: self.descs = self.descs.float() if self.cuda: self.descs = self.descs.cuda() descs = self.descs[:self.batch_size] descs[:, :self.orientations, :, :] = hist_smooth[ :, 0, :, self.radius:-self.radius, self.radius:-self.radius] idx = self.orientations cos_theta = np.cos(theta) sin_theta = np.sin(theta) for i in range(self.rings): for j in range(self.histograms): y_min = self.radius + int(round( self.ring_radii[i] * sin_theta[j])) y_max = descs.shape[2] + y_min x_min = self.radius + int(round( self.ring_radii[i] * cos_theta[j])) x_max = descs.shape[3] + x_min # print(i, j, y_min, y_max, x_min, x_max) descs[:, idx:idx + self.orientations, :, :] = hist_smooth[ :, i + 1, :, y_min:y_max, x_min:x_max] idx += self.orientations descs = descs[:, :, ::self.step, ::self.step] # Normalize descriptors. if self.normalization != 'off': if self.fp16: descs += 1e-3 else: descs += 1e-10 if self.normalization == 'l1': descs /= torch.sum(descs, dim=1, keepdim=True) elif self.normalization == 'l2': descs /= torch.sqrt(torch.sum( torch.pow(descs, 2), dim=1, keepdim=True)) elif self.normalization == 'daisy': for i in range(0, desc_dims, self.orientations): norms = torch.sqrt(torch.sum( torch.pow(descs[:, i:i + self.orientations], 2), dim=1, keepdim=True)) descs[:, i:i + self.orientations] /= norms if self.return_numpy: descs = descs.detach().cpu().numpy() return descs
def main(): args = parse_args() with open('models/pose/%s/config.yml' % args.pose_name, 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) print('-' * 20) for key in config.keys(): print('%s: %s' % (key, str(config[key]))) print('-' * 20) cudnn.benchmark = True df = pd.read_csv('inputs/train.csv') img_ids = df['ImageId'].values img_paths = np.array('inputs/train_images/' + df['ImageId'].values + '.jpg') mask_paths = np.array('inputs/train_masks/' + df['ImageId'].values + '.jpg') labels = np.array( [convert_str_to_labels(s) for s in df['PredictionString']]) with open('outputs/decoded/val/%s.json' % args.det_name, 'r') as f: dets = json.load(f) if config['rot'] == 'eular': num_outputs = 3 elif config['rot'] == 'trig': num_outputs = 6 elif config['rot'] == 'quat': num_outputs = 4 else: raise NotImplementedError test_transform = Compose([ transforms.Resize(config['input_w'], config['input_h']), transforms.Normalize(), ToTensor(), ]) det_df = { 'ImageId': [], 'img_path': [], 'det': [], 'mask': [], } name = '%s_%.2f' % (args.det_name, args.score_th) if args.nms: name += '_nms%.2f' % args.nms_th output_dir = 'processed/pose_images/val/%s' % name os.makedirs(output_dir, exist_ok=True) df = [] kf = KFold(n_splits=config['n_splits'], shuffle=True, random_state=41) for fold, (train_idx, val_idx) in enumerate(kf.split(img_paths)): print('Fold [%d/%d]' % (fold + 1, config['n_splits'])) # create model model = get_pose_model(config['arch'], num_outputs=num_outputs, freeze_bn=config['freeze_bn']) model = model.cuda() model_path = 'models/pose/%s/model_%d.pth' % (config['name'], fold + 1) if not os.path.exists(model_path): print('%s is not exists.' % model_path) continue model.load_state_dict(torch.load(model_path)) model.eval() val_img_ids = img_ids[val_idx] val_img_paths = img_paths[val_idx] fold_det_df = { 'ImageId': [], 'img_path': [], 'det': [], 'mask': [], } for img_id, img_path in tqdm(zip(val_img_ids, val_img_paths), total=len(val_img_ids)): img = cv2.imread(img_path) height, width = img.shape[:2] det = np.array(dets[img_id]) det = det[det[:, 6] > args.score_th] if args.nms: det = nms(det, dist_th=args.nms_th) for k in range(len(det)): pitch, yaw, roll, x, y, z, score, w, h = det[k] fold_det_df['ImageId'].append(img_id) fold_det_df['det'].append(det[k]) output_path = '%s_%d.jpg' % (img_id, k) fold_det_df['img_path'].append(output_path) x, y = convert_3d_to_2d(x, y, z) w *= 1.1 h *= 1.1 xmin = int(round(x - w / 2)) xmax = int(round(x + w / 2)) ymin = int(round(y - h / 2)) ymax = int(round(y + h / 2)) cropped_img = img[ymin:ymax, xmin:xmax] if cropped_img.shape[0] > 0 and cropped_img.shape[1] > 0: cv2.imwrite(os.path.join(output_dir, output_path), cropped_img) fold_det_df['mask'].append(1) else: fold_det_df['mask'].append(0) fold_det_df = pd.DataFrame(fold_det_df) test_set = PoseDataset(output_dir + '/' + fold_det_df['img_path'].values, fold_det_df['det'].values, transform=test_transform, masks=fold_det_df['mask'].values) test_loader = torch.utils.data.DataLoader( test_set, batch_size=config['batch_size'], shuffle=False, num_workers=config['num_workers'], # pin_memory=True, ) fold_dets = [] with torch.no_grad(): for input, batch_det, mask in tqdm(test_loader, total=len(test_loader)): input = input.cuda() batch_det = batch_det.numpy() mask = mask.numpy() output = model(input) output = output.cpu() if config['rot'] == 'trig': yaw = torch.atan2(output[..., 1:2], output[..., 0:1]) pitch = torch.atan2(output[..., 3:4], output[..., 2:3]) roll = torch.atan2(output[..., 5:6], output[..., 4:5]) roll = rotate(roll, -np.pi) pitch = pitch.cpu().numpy()[:, 0] yaw = yaw.cpu().numpy()[:, 0] roll = roll.cpu().numpy()[:, 0] batch_det[mask, 0] = pitch[mask] batch_det[mask, 1] = yaw[mask] batch_det[mask, 2] = roll[mask] fold_dets.append(batch_det) fold_dets = np.vstack(fold_dets) fold_det_df['det'] = fold_dets.tolist() fold_det_df = fold_det_df.groupby('ImageId')['det'].apply(list) fold_det_df = pd.DataFrame({ 'ImageId': fold_det_df.index.values, 'PredictionString': fold_det_df.values, }) df.append(fold_det_df) break df = pd.concat(df).reset_index(drop=True) for i in tqdm(range(len(df))): img_id = df.loc[i, 'ImageId'] det = np.array(df.loc[i, 'PredictionString']) if args.show: img = cv2.imread('inputs/train_images/%s.jpg' % img_id) img_pred = visualize(img, det) plt.imshow(img_pred[..., ::-1]) plt.show() df.loc[i, 'PredictionString'] = convert_labels_to_str(det[:, :7]) name += '_%s' % args.pose_name df.to_csv('outputs/submissions/val/%s.csv' % name, index=False)
def trans_no_rotation(self, state): """ Transform the coordinate to agent-centric. Input tuple include robot state tensor and human state tensor. robot state tensor is of size (batch_size, number, state_length)(for example 100*1*9) human state tensor is of size (batch_size, number, state_length)(for example 100*5*5) """ # for robot # 'px', 'py', 'vx', 'vy', 'radius', 'gx', 'gy', 'v_pref', 'theta' # 0 1 2 3 4 5 6 7 8 # for human # 'px', 'py', 'vx', 'vy', 'radius' # 0 1 2 3 4 assert len(state[0].shape) == 3 if state[1] is None: robot_state = state[0] dx = robot_state[:, :, 5] - robot_state[:, :, 0] dy = robot_state[:, :, 6] - robot_state[:, :, 1] dx = dx.unsqueeze(1) dy = dy.unsqueeze(1) radius_r = robot_state[:, :, 4].unsqueeze(1) dg = torch.norm(torch.cat([dx, dy], dim=2), 2, dim=2, keepdim=True) rot = torch.atan2(dy, dx) vx = robot_state[:, :, 2].unsqueeze(1) vy = robot_state[:, :, 3].unsqueeze(1) v_pref = robot_state[:, :, 7].unsqueeze(1) px_r = torch.zeros_like(v_pref) py_r = torch.zeros_like(v_pref) theta = robot_state[:, :, 8].unsqueeze(1) new_robot_state = torch.cat((px_r, py_r, vx, vy, radius_r, dg, rot, v_pref, theta), dim=2) new_state = (new_robot_state, None) return new_state else: batch = state[0].shape[0] robot_state = state[0] human_state = state[1] human_num = state[1].shape[1] dx = robot_state[:, :, 5] - robot_state[:, :, 0] dy = robot_state[:, :, 6] - robot_state[:, :, 1] dx = dx.unsqueeze(1) dy = dy.unsqueeze(1) radius_r = robot_state[:, :, 4].unsqueeze(1) dg = torch.norm(torch.cat([dx, dy], dim=2), 2, dim=2, keepdim=True) rot = torch.atan2(dy, dx) vx = robot_state[:, :, 2].unsqueeze(1) vy = robot_state[:, :, 3].unsqueeze(1) v_pref = robot_state[:, :, 7].unsqueeze(1) px_r = torch.zeros_like(v_pref) py_r = torch.zeros_like(v_pref) theta = robot_state[:, :, 8].unsqueeze(1) new_robot_state = torch.cat((px_r, py_r, vx, vy, radius_r, dg, rot, v_pref, theta), dim=2) new_human_state = None for i in range(human_num): dx1 = human_state[:, i, 0].unsqueeze(1) - robot_state[:, :, 0] dy1 = human_state[:, i, 1].unsqueeze(1) - robot_state[:, :, 1] dx1 = dx1.unsqueeze(1).reshape((batch, 1, -1)) dy1 = dy1.unsqueeze(1).reshape((batch, 1, -1)) vx1 = (human_state[:, i, 2].unsqueeze(1).unsqueeze(2)).reshape((batch, 1, -1)) vy1 = (human_state[:, i, 3].unsqueeze(1).unsqueeze(2)).reshape((batch, 1, -1)) radius_h = human_state[:, i, 4].unsqueeze(1).unsqueeze(2) cur_human_state = torch.cat((dx1, dy1, vx1, vy1, radius_h), dim=2) if new_human_state is None: new_human_state = cur_human_state else: new_human_state = torch.cat((new_human_state, cur_human_state), dim=1) new_state = (new_robot_state, new_human_state) return new_state
def forward(self, inputs, lens=None): specs = self.stft(inputs) real = specs[:, :self.fft_len // 2 + 1] imag = specs[:, self.fft_len // 2 + 1:] spec_mags = torch.sqrt(real**2 + imag**2 + 1e-8) spec_mags = spec_mags spec_phase = torch.atan2(imag, real) spec_phase = spec_phase cspecs = torch.stack([real, imag], 1) cspecs = cspecs[:, :, 1:] ''' means = torch.mean(cspecs, [1,2,3], keepdim=True) std = torch.std(cspecs, [1,2,3], keepdim=True ) normed_cspecs = (cspecs-means)/(std+1e-8) out = normed_cspecs ''' out = cspecs encoder_out = [] for idx, layer in enumerate(self.encoder): out = layer(out) # print('encoder', out.size()) encoder_out.append(out) batch_size, channels, dims, lengths = out.size() out = out.permute(3, 0, 1, 2) if self.use_clstm: r_rnn_in = out[:, :, :channels // 2] i_rnn_in = out[:, :, channels // 2:] r_rnn_in = torch.reshape( r_rnn_in, [lengths, batch_size, channels // 2 * dims]) i_rnn_in = torch.reshape( i_rnn_in, [lengths, batch_size, channels // 2 * dims]) r_rnn_in, i_rnn_in = self.enhance([r_rnn_in, i_rnn_in]) r_rnn_in = torch.reshape( r_rnn_in, [lengths, batch_size, channels // 2, dims]) i_rnn_in = torch.reshape( i_rnn_in, [lengths, batch_size, channels // 2, dims]) out = torch.cat([r_rnn_in, i_rnn_in], 2) else: # to [L, B, C, D] out = torch.reshape(out, [lengths, batch_size, channels * dims]) out, _ = self.enhance(out) out = self.tranform(out) out = torch.reshape(out, [lengths, batch_size, channels, dims]) out = out.permute(1, 2, 3, 0) for idx in range(len(self.decoder)): out = complex_cat([out, encoder_out[-1 - idx]], 1) out = self.decoder[idx](out) out = out[..., 1:] # print('decoder', out.size()) mask_real = out[:, 0] mask_imag = out[:, 1] mask_real = F.pad(mask_real, [0, 0, 1, 0]) mask_imag = F.pad(mask_imag, [0, 0, 1, 0]) if self.masking_mode == 'E': mask_mags = (mask_real**2 + mask_imag**2)**0.5 real_phase = mask_real / (mask_mags + 1e-8) imag_phase = mask_imag / (mask_mags + 1e-8) mask_phase = torch.atan2(imag_phase, real_phase) # mask_mags = torch.clamp_(mask_mags,0,100) mask_mags = torch.tanh(mask_mags) est_mags = mask_mags * spec_mags est_phase = spec_phase + mask_phase real = est_mags * torch.cos(est_phase) imag = est_mags * torch.sin(est_phase) elif self.masking_mode == 'C': real, imag = real * mask_real - imag * mask_imag, real * mask_imag + imag * mask_real elif self.masking_mode == 'R': real, imag = real * mask_real, imag * mask_imag out_spec = torch.cat([real, imag], 1) out_wav = self.istft(out_spec) out_wav = torch.squeeze(out_wav, 1) # out_wav = torch.tanh(out_wav) # add _ to be a in-place operation out_wav = torch.clamp_(out_wav, -1, 1) return out_spec, out_wav
def _get_target_single(self, gt_bboxes, gt_labels, gt_bboxes_3d, gt_labels_3d, centers2d, depths, attr_labels, points, regress_ranges, num_points_per_lvl): """Compute regression and classification targets for a single image.""" num_points = points.size(0) num_gts = gt_labels.size(0) if not isinstance(gt_bboxes_3d, torch.Tensor): gt_bboxes_3d = gt_bboxes_3d.tensor.to(gt_bboxes.device) if num_gts == 0: return gt_labels.new_full((num_points,), self.background_label), \ gt_bboxes.new_zeros((num_points, 4)), \ gt_labels_3d.new_full( (num_points,), self.background_label), \ gt_bboxes_3d.new_zeros((num_points, self.bbox_code_size)), \ gt_bboxes_3d.new_zeros((num_points,)), \ attr_labels.new_full( (num_points,), self.attr_background_label) # change orientation to local yaw gt_bboxes_3d[..., 6] = -torch.atan2( gt_bboxes_3d[..., 0], gt_bboxes_3d[..., 2]) + gt_bboxes_3d[..., 6] areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * (gt_bboxes[:, 3] - gt_bboxes[:, 1]) areas = areas[None].repeat(num_points, 1) regress_ranges = regress_ranges[:, None, :].expand( num_points, num_gts, 2) gt_bboxes = gt_bboxes[None].expand(num_points, num_gts, 4) centers2d = centers2d[None].expand(num_points, num_gts, 2) gt_bboxes_3d = gt_bboxes_3d[None].expand(num_points, num_gts, self.bbox_code_size) depths = depths[None, :, None].expand(num_points, num_gts, 1) xs, ys = points[:, 0], points[:, 1] xs = xs[:, None].expand(num_points, num_gts) ys = ys[:, None].expand(num_points, num_gts) delta_xs = (xs - centers2d[..., 0])[..., None] delta_ys = (ys - centers2d[..., 1])[..., None] bbox_targets_3d = torch.cat( (delta_xs, delta_ys, depths, gt_bboxes_3d[..., 3:]), dim=-1) left = xs - gt_bboxes[..., 0] right = gt_bboxes[..., 2] - xs top = ys - gt_bboxes[..., 1] bottom = gt_bboxes[..., 3] - ys bbox_targets = torch.stack((left, top, right, bottom), -1) assert self.center_sampling is True, 'Setting center_sampling to '\ 'False has not been implemented for FCOS3D.' # condition1: inside a `center bbox` radius = self.center_sample_radius center_xs = centers2d[..., 0] center_ys = centers2d[..., 1] center_gts = torch.zeros_like(gt_bboxes) stride = center_xs.new_zeros(center_xs.shape) # project the points on current lvl back to the `original` sizes lvl_begin = 0 for lvl_idx, num_points_lvl in enumerate(num_points_per_lvl): lvl_end = lvl_begin + num_points_lvl stride[lvl_begin:lvl_end] = self.strides[lvl_idx] * radius lvl_begin = lvl_end center_gts[..., 0] = center_xs - stride center_gts[..., 1] = center_ys - stride center_gts[..., 2] = center_xs + stride center_gts[..., 3] = center_ys + stride cb_dist_left = xs - center_gts[..., 0] cb_dist_right = center_gts[..., 2] - xs cb_dist_top = ys - center_gts[..., 1] cb_dist_bottom = center_gts[..., 3] - ys center_bbox = torch.stack( (cb_dist_left, cb_dist_top, cb_dist_right, cb_dist_bottom), -1) inside_gt_bbox_mask = center_bbox.min(-1)[0] > 0 # condition2: limit the regression range for each location max_regress_distance = bbox_targets.max(-1)[0] inside_regress_range = ( (max_regress_distance >= regress_ranges[..., 0]) & (max_regress_distance <= regress_ranges[..., 1])) # center-based criterion to deal with ambiguity dists = torch.sqrt(torch.sum(bbox_targets_3d[..., :2]**2, dim=-1)) dists[inside_gt_bbox_mask == 0] = INF dists[inside_regress_range == 0] = INF min_dist, min_dist_inds = dists.min(dim=1) labels = gt_labels[min_dist_inds] labels_3d = gt_labels_3d[min_dist_inds] attr_labels = attr_labels[min_dist_inds] labels[min_dist == INF] = self.background_label # set as BG labels_3d[min_dist == INF] = self.background_label # set as BG attr_labels[min_dist == INF] = self.attr_background_label bbox_targets = bbox_targets[range(num_points), min_dist_inds] bbox_targets_3d = bbox_targets_3d[range(num_points), min_dist_inds] relative_dists = torch.sqrt( torch.sum(bbox_targets_3d[..., :2]**2, dim=-1)) / (1.414 * stride[:, 0]) # [N, 1] / [N, 1] centerness_targets = torch.exp(-self.centerness_alpha * relative_dists) return labels, bbox_targets, labels_3d, bbox_targets_3d, \ centerness_targets, attr_labels
def phase(self): return torch.atan2(self.imag, self.real)
def backward(ctx, grad_output): w_gt, h_gt, w, h = ctx.w_gt, ctx.h_gt, ctx.w, ctx.h arc = 8 * (torch.atan2(w_gt, h_gt) - torch.atan2(w, h)) / (np.pi**2) return -h_gt * arc, w_gt * arc, h * arc, w * arc
def _get_bboxes_single(self, cls_scores, bbox_preds, dir_cls_preds, attr_preds, centernesses, mlvl_points, input_meta, cfg, rescale=False): """Transform outputs for a single batch item into bbox predictions. Args: cls_scores (list[Tensor]): Box scores for a single scale level Has shape (num_points * num_classes, H, W). bbox_preds (list[Tensor]): Box energies / deltas for a single scale level with shape (num_points * bbox_code_size, H, W). dir_cls_preds (list[Tensor]): Box scores for direction class predictions on a single scale level with shape \ (num_points * 2, H, W) attr_preds (list[Tensor]): Attribute scores for each scale level Has shape (N, num_points * num_attrs, H, W) centernesses (list[Tensor]): Centerness for a single scale level with shape (num_points, H, W). mlvl_points (list[Tensor]): Box reference for a single scale level with shape (num_total_points, 2). input_meta (dict): Metadata of input image. cfg (mmcv.Config): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Returns: tuples[Tensor]: Predicted 3D boxes, scores, labels and attributes. """ view = np.array(input_meta['cam2img']) scale_factor = input_meta['scale_factor'] cfg = self.test_cfg if cfg is None else cfg assert len(cls_scores) == len(bbox_preds) == len(mlvl_points) mlvl_centers2d = [] mlvl_bboxes = [] mlvl_scores = [] mlvl_dir_scores = [] mlvl_attr_scores = [] mlvl_centerness = [] for cls_score, bbox_pred, dir_cls_pred, attr_pred, centerness, \ points in zip(cls_scores, bbox_preds, dir_cls_preds, attr_preds, centernesses, mlvl_points): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] scores = cls_score.permute(1, 2, 0).reshape( -1, self.cls_out_channels).sigmoid() dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2) dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1] attr_pred = attr_pred.permute(1, 2, 0).reshape(-1, self.num_attrs) attr_score = torch.max(attr_pred, dim=-1)[1] centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid() bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, sum(self.group_reg_dims)) bbox_pred = bbox_pred[:, :self.bbox_code_size] nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: max_scores, _ = (scores * centerness[:, None]).max(dim=1) _, topk_inds = max_scores.topk(nms_pre) points = points[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] dir_cls_pred = dir_cls_pred[topk_inds, :] centerness = centerness[topk_inds] dir_cls_score = dir_cls_score[topk_inds] attr_score = attr_score[topk_inds] # change the offset to actual center predictions bbox_pred[:, :2] = points - bbox_pred[:, :2] if rescale: bbox_pred[:, :2] /= bbox_pred[:, :2].new_tensor(scale_factor) pred_center2d = bbox_pred[:, :3].clone() bbox_pred[:, :3] = self.pts2Dto3D(bbox_pred[:, :3], view) mlvl_centers2d.append(pred_center2d) mlvl_bboxes.append(bbox_pred) mlvl_scores.append(scores) mlvl_dir_scores.append(dir_cls_score) mlvl_attr_scores.append(attr_score) mlvl_centerness.append(centerness) mlvl_centers2d = torch.cat(mlvl_centers2d) mlvl_bboxes = torch.cat(mlvl_bboxes) mlvl_dir_scores = torch.cat(mlvl_dir_scores) # change local yaw to global yaw for 3D nms if mlvl_bboxes.shape[0] > 0: dir_rot = limit_period(mlvl_bboxes[..., 6] - self.dir_offset, 0, np.pi) mlvl_bboxes[..., 6] = (dir_rot + self.dir_offset + np.pi * mlvl_dir_scores.to(mlvl_bboxes.dtype)) cam_intrinsic = mlvl_centers2d.new_zeros((4, 4)) cam_intrinsic[:view.shape[0], :view.shape[1]] = \ mlvl_centers2d.new_tensor(view) mlvl_bboxes[:, 6] = torch.atan2( mlvl_centers2d[:, 0] - cam_intrinsic[0, 2], cam_intrinsic[0, 0]) + mlvl_bboxes[:, 6] mlvl_bboxes_for_nms = xywhr2xyxyr( input_meta['box_type_3d'](mlvl_bboxes, box_dim=self.bbox_code_size, origin=(0.5, 0.5, 0.5)).bev) mlvl_scores = torch.cat(mlvl_scores) padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) # remind that we set FG labels to [0, num_class-1] since mmdet v2.0 # BG cat_id: num_class mlvl_scores = torch.cat([mlvl_scores, padding], dim=1) mlvl_attr_scores = torch.cat(mlvl_attr_scores) mlvl_centerness = torch.cat(mlvl_centerness) # no scale_factors in box3d_multiclass_nms # Then we multiply it from outside mlvl_nms_scores = mlvl_scores * mlvl_centerness[:, None] results = box3d_multiclass_nms(mlvl_bboxes, mlvl_bboxes_for_nms, mlvl_nms_scores, cfg.score_thr, cfg.max_per_img, cfg, mlvl_dir_scores, mlvl_attr_scores) bboxes, scores, labels, dir_scores, attrs = results attrs = attrs.to(labels.dtype) # change data type to int bboxes = input_meta['box_type_3d'](bboxes, box_dim=self.bbox_code_size, origin=(0.5, 0.5, 0.5)) # Note that the predictions use origin (0.5, 0.5, 0.5) # Due to the ground truth centers2d are the gravity center of objects # v0.10.0 fix inplace operation to the input tensor of cam_box3d # So here we also need to add origin=(0.5, 0.5, 0.5) if not self.pred_attrs: attrs = None return bboxes, scores, labels, attrs
def forward(self, output, label): return torch.mean( torch.atan2((torch.norm(torch.cross(output, label), dim=1)), (torch.sum(output * label, dim=1))) * 180 / np.pi)
def scale(self, scale_x: float, scale_y: float) -> None: """ Scale the rotated box with horizontal and vertical scaling factors Note: when scale_factor_x != scale_factor_y, the rotated box does not preserve the rectangular shape when the angle is not a multiple of 90 degrees under resize transformation. Instead, the shape is a parallelogram (that has skew) Here we make an approximation by fitting a rotated rectangle to the parallelogram. """ self.tensor[:, 0] *= scale_x self.tensor[:, 1] *= scale_y theta = self.tensor[:, 4] * math.pi / 180.0 c = torch.cos(theta) s = torch.sin(theta) # In image space, y is top->down and x is left->right # Consider the local coordintate system for the rotated box, # where the box center is located at (0, 0), and the four vertices ABCD are # A(-w / 2, -h / 2), B(w / 2, -h / 2), C(w / 2, h / 2), D(-w / 2, h / 2) # the midpoint of the left edge AD of the rotated box E is: # E = (A+D)/2 = (-w / 2, 0) # the midpoint of the top edge AB of the rotated box F is: # F(0, -h / 2) # To get the old coordinates in the global system, apply the rotation transformation # (Note: the right-handed coordinate system for image space is yOx): # (old_x, old_y) = (s * y + c * x, c * y - s * x) # E(old) = (s * 0 + c * (-w/2), c * 0 - s * (-w/2)) = (-c * w / 2, s * w / 2) # F(old) = (s * (-h / 2) + c * 0, c * (-h / 2) - s * 0) = (-s * h / 2, -c * h / 2) # After applying the scaling factor (sfx, sfy): # E(new) = (-sfx * c * w / 2, sfy * s * w / 2) # F(new) = (-sfx * s * h / 2, -sfy * c * h / 2) # The new width after scaling tranformation becomes: # w(new) = |E(new) - O| * 2 # = sqrt[(sfx * c * w / 2)^2 + (sfy * s * w / 2)^2] * 2 # = sqrt[(sfx * c)^2 + (sfy * s)^2] * w # i.e., scale_factor_w = sqrt[(sfx * c)^2 + (sfy * s)^2] # # For example, # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_w == scale_factor_x; # when |angle| = 90, c = 0, |s| = 1, scale_factor_w == scale_factor_y self.tensor[:, 2] *= torch.sqrt((scale_x * c)**2 + (scale_y * s)**2) # h(new) = |F(new) - O| * 2 # = sqrt[(sfx * s * h / 2)^2 + (sfy * c * h / 2)^2] * 2 # = sqrt[(sfx * s)^2 + (sfy * c)^2] * h # i.e., scale_factor_h = sqrt[(sfx * s)^2 + (sfy * c)^2] # # For example, # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_h == scale_factor_y; # when |angle| = 90, c = 0, |s| = 1, scale_factor_h == scale_factor_x self.tensor[:, 3] *= torch.sqrt((scale_x * s)**2 + (scale_y * c)**2) # The angle is the rotation angle from y-axis in image space to the height # vector (top->down in the box's local coordinate system) of the box in CCW. # # angle(new) = angle_yOx(O - F(new)) # = angle_yOx( (sfx * s * h / 2, sfy * c * h / 2) ) # = atan2(sfx * s * h / 2, sfy * c * h / 2) # = atan2(sfx * s, sfy * c) # # For example, # when sfx == sfy, angle(new) == atan2(s, c) == angle(old) self.tensor[:, 4] = torch.atan2(scale_x * s, scale_y * c) * 180 / math.pi
def solve_3d_bbox_single(bbox2D, corners, theta_l, calib): """ Input: bbox2D: Tensor(4), [x1, y1, x2, y2] corners: Tensor(8, 3), aligned corners without rotation theta_l: camera direction [-pi, pi] calib: calibration metrices in KITTI """ x1, y1, x2, y2 = bbox2D # useful calibrations P2 = calib['P2'] R0_rect = torch.eye(4) R0_rect[:3, :3] = calib['R0_rect'] K = torch.matmul(P2, R0_rect) # use 2D bbox to estimate global rotation theta_ray = torch.atan2(P2[0, 0], (x1 + x2) * 0.5 - P2[0, 2]) ry = np.pi - theta_ray - theta_l Ry_T = torch.tensor([[torch.cos(ry), 0.0, -torch.sin(ry)], [0.0, 1.0, 0.0], [torch.sin(ry), 0.0, torch.cos(ry)]]) corners = torch.matmul(corners, Ry_T) # rotated corners # adjust front side if theta_l >= np.pi / 2.0 and theta_l < np.pi: corners = corners[[3, 0, 1, 2, 7, 4, 5, 6]].contiguous() elif theta_l >= -np.pi and theta_l < -np.pi / 2.0: corners = corners[[2, 3, 0, 1, 6, 7, 4, 5]].contiguous() elif theta_l >= -np.pi / 2.0 and theta_l < 0.0: corners = corners[[1, 2, 3, 0, 5, 6, 7, 4]].contiguous() # start solve constrains X = torch.eye(4) A = torch.zeros(4, 3) b = torch.zeros(4) # prepare constrains constrains = {} # x1 -> 7, 6 constrains['x1'] = {} for i in [7, 6]: constrains['x1'][i] = {} X[:3, 3] = corners[i] K_X = torch.matmul(K, X) constrains['x1'][i]['A'] = K_X[0, :3] - x1 * K_X[2, :3] constrains['x1'][i]['b'] = x1 * K_X[2, 3] - K_X[0, 3] # x2 -> 4, 7 constrains['x2'] = {} for i in [4, 7]: constrains['x2'][i] = {} X[:3, 3] = corners[i] K_X = torch.matmul(K, X) constrains['x2'][i]['A'] = K_X[0, :3] - x2 * K_X[2, :3] constrains['x2'][i]['b'] = x2 * K_X[2, 3] - K_X[0, 3] # y1 -> 4, 5, 6, 7 constrains['y1'] = {} for i in [4, 5, 6, 7]: constrains['y1'][i] = {} X[:3, 3] = corners[i] K_X = torch.matmul(K, X) constrains['y1'][i]['A'] = K_X[1, :3] - y1 * K_X[2, :3] constrains['y1'][i]['b'] = y1 * K_X[2, 3] - K_X[1, 3] # y2 -> 2, 3, 0 constrains['y2'] = {} for i in [2, 3, 0]: constrains['y2'][i] = {} X[:3, 3] = corners[i] K_X = torch.matmul(K, X) constrains['y2'][i]['A'] = K_X[1, :3] - y2 * K_X[2, :3] constrains['y2'][i]['b'] = y2 * K_X[2, 3] - K_X[1, 3] # solving linear functions error = float('inf') # case 1: only see front side A[0] = constrains['x1'][7]['A'] b[0] = constrains['x1'][7]['b'] A[1] = constrains['x2'][4]['A'] b[1] = constrains['x2'][4]['b'] for i in [3, 0]: A[2] = constrains['y2'][i]['A'] b[2] = constrains['y2'][i]['b'] for j in [4, 5, 6, 7]: A[3] = constrains['y1'][j]['A'] b[3] = constrains['y1'][j]['b'] trans_t = torch.matmul(torch.pinverse(A), b) error_t = torch.norm(torch.matmul(A, trans_t) - b) if error_t < error: trans = trans_t error = error_t # case 2: see both front side and lateral side A[0] = constrains['x1'][6]['A'] b[0] = constrains['x1'][6]['b'] for i in [2, 3, 0]: A[2] = constrains['y2'][i]['A'] b[2] = constrains['y2'][i]['b'] for j in [4, 5, 6, 7]: A[3] = constrains['y1'][j]['A'] b[3] = constrains['y1'][j]['b'] trans_t = torch.matmul(torch.pinverse(A), b) error_t = torch.norm(torch.matmul(A, trans_t) - b) if error_t < error: trans = trans_t error = error_t # case 2: only see lateral side A[1] = constrains['x2'][7]['A'] b[1] = constrains['x2'][7]['b'] for i in [2, 3]: A[2] = constrains['y2'][i]['A'] b[2] = constrains['y2'][i]['b'] for j in [4, 5, 6, 7]: A[3] = constrains['y1'][j]['A'] b[3] = constrains['y1'][j]['b'] trans_t = torch.matmul(torch.pinverse(A), b) error_t = torch.norm(torch.matmul(A, trans_t) - b) if error_t < error: trans = trans_t error = error_t return trans
def R2euler(R): return stackify((torch.atan2(R[2, 1], R[2, 2]), torch.atan2(-R[2, 0], torch.sqrt(R[0, 0]**2 + R[1, 0]**2)), torch.atan2(R[1, 0], R[0, 0])))
def cubic_spline( inputs, unnormalized_widths, unnormalized_heights, unnorm_derivatives_left, unnorm_derivatives_right, inverse=False, left=0.0, right=1.0, bottom=0.0, top=1.0, min_bin_width=DEFAULT_MIN_BIN_WIDTH, min_bin_height=DEFAULT_MIN_BIN_HEIGHT, eps=DEFAULT_EPS, quadratic_threshold=DEFAULT_QUADRATIC_THRESHOLD, ): """ References: > Blinn, J. F. (2007). How to solve a cubic equation, part 5: Back to numerics. IEEE Computer Graphics and Applications, 27(3):78–89. """ if torch.min(inputs) < left or torch.max(inputs) > right: raise InputOutsideDomain() num_bins = unnormalized_widths.shape[-1] if min_bin_width * num_bins > 1.0: raise ValueError("Minimal bin width too large for the number of bins") if min_bin_height * num_bins > 1.0: raise ValueError("Minimal bin height too large for the number of bins") if inverse: inputs = (inputs - bottom) / (top - bottom) else: inputs = (inputs - left) / (right - left) widths = F.softmax(unnormalized_widths, dim=-1) widths = min_bin_width + (1 - min_bin_width * num_bins) * widths cumwidths = torch.cumsum(widths, dim=-1) cumwidths[..., -1] = 1 cumwidths = F.pad(cumwidths, pad=(1, 0), mode="constant", value=0.0) heights = F.softmax(unnormalized_heights, dim=-1) heights = min_bin_height + (1 - min_bin_height * num_bins) * heights cumheights = torch.cumsum(heights, dim=-1) cumheights[..., -1] = 1 cumheights = F.pad(cumheights, pad=(1, 0), mode="constant", value=0.0) slopes = heights / widths min_something_1 = torch.min(torch.abs(slopes[..., :-1]), torch.abs(slopes[..., 1:])) min_something_2 = (0.5 * (widths[..., 1:] * slopes[..., :-1] + widths[..., :-1] * slopes[..., 1:]) / (widths[..., :-1] + widths[..., 1:])) min_something = torch.min(min_something_1, min_something_2) derivatives_left = (torch.sigmoid(unnorm_derivatives_left) * 3 * slopes[..., 0][..., None]) derivatives_right = (torch.sigmoid(unnorm_derivatives_right) * 3 * slopes[..., -1][..., None]) derivatives = min_something * (torch.sign(slopes[..., :-1]) + torch.sign(slopes[..., 1:])) derivatives = torch.cat([derivatives_left, derivatives, derivatives_right], dim=-1) a = (derivatives[..., :-1] + derivatives[..., 1:] - 2 * slopes) / widths.pow(2) b = (3 * slopes - 2 * derivatives[..., :-1] - derivatives[..., 1:]) / widths c = derivatives[..., :-1] d = cumheights[..., :-1] if inverse: bin_idx = torchutils.searchsorted(cumheights, inputs)[..., None] else: bin_idx = torchutils.searchsorted(cumwidths, inputs)[..., None] inputs_a = a.gather(-1, bin_idx)[..., 0] inputs_b = b.gather(-1, bin_idx)[..., 0] inputs_c = c.gather(-1, bin_idx)[..., 0] inputs_d = d.gather(-1, bin_idx)[..., 0] input_left_cumwidths = cumwidths.gather(-1, bin_idx)[..., 0] input_right_cumwidths = cumwidths.gather(-1, bin_idx + 1)[..., 0] if inverse: # Modified coefficients for solving the cubic. inputs_b_ = (inputs_b / inputs_a) / 3.0 inputs_c_ = (inputs_c / inputs_a) / 3.0 inputs_d_ = (inputs_d - inputs) / inputs_a delta_1 = -inputs_b_.pow(2) + inputs_c_ delta_2 = -inputs_c_ * inputs_b_ + inputs_d_ delta_3 = inputs_b_ * inputs_d_ - inputs_c_.pow(2) discriminant = 4.0 * delta_1 * delta_3 - delta_2.pow(2) depressed_1 = -2.0 * inputs_b_ * delta_1 + delta_2 depressed_2 = delta_1 three_roots_mask = ( discriminant >= 0 ) # Discriminant == 0 might be a problem in practice. one_root_mask = discriminant < 0 outputs = torch.zeros_like(inputs) # Deal with one root cases. p = torchutils.cbrt((-depressed_1[one_root_mask] + torch.sqrt(-discriminant[one_root_mask])) / 2.0) q = torchutils.cbrt((-depressed_1[one_root_mask] - torch.sqrt(-discriminant[one_root_mask])) / 2.0) outputs[one_root_mask] = ((p + q) - inputs_b_[one_root_mask] + input_left_cumwidths[one_root_mask]) # Deal with three root cases. theta = torch.atan2(torch.sqrt(discriminant[three_roots_mask]), -depressed_1[three_roots_mask]) theta /= 3.0 cubic_root_1 = torch.cos(theta) cubic_root_2 = torch.sin(theta) root_1 = cubic_root_1 root_2 = -0.5 * cubic_root_1 - 0.5 * math.sqrt(3) * cubic_root_2 root_3 = -0.5 * cubic_root_1 + 0.5 * math.sqrt(3) * cubic_root_2 root_scale = 2 * torch.sqrt(-depressed_2[three_roots_mask]) root_shift = (-inputs_b_[three_roots_mask] + input_left_cumwidths[three_roots_mask]) root_1 = root_1 * root_scale + root_shift root_2 = root_2 * root_scale + root_shift root_3 = root_3 * root_scale + root_shift root1_mask = ((input_left_cumwidths[three_roots_mask] - eps) < root_1).float() root1_mask *= ( root_1 < (input_right_cumwidths[three_roots_mask] + eps)).float() root2_mask = ((input_left_cumwidths[three_roots_mask] - eps) < root_2).float() root2_mask *= ( root_2 < (input_right_cumwidths[three_roots_mask] + eps)).float() root3_mask = ((input_left_cumwidths[three_roots_mask] - eps) < root_3).float() root3_mask *= ( root_3 < (input_right_cumwidths[three_roots_mask] + eps)).float() roots = torch.stack([root_1, root_2, root_3], dim=-1) masks = torch.stack([root1_mask, root2_mask, root3_mask], dim=-1) mask_index = torch.argsort(masks, dim=-1, descending=True)[..., 0][..., None] outputs[three_roots_mask] = torch.gather(roots, dim=-1, index=mask_index).view(-1) # Deal with a -> 0 (almost quadratic) cases. quadratic_mask = inputs_a.abs() < quadratic_threshold a = inputs_b[quadratic_mask] b = inputs_c[quadratic_mask] c = inputs_d[quadratic_mask] - inputs[quadratic_mask] alpha = (-b + torch.sqrt(b.pow(2) - 4 * a * c)) / (2 * a) outputs[quadratic_mask] = alpha + input_left_cumwidths[quadratic_mask] shifted_outputs = outputs - input_left_cumwidths logabsdet = -torch.log((3 * inputs_a * shifted_outputs.pow(2) + 2 * inputs_b * shifted_outputs + inputs_c)) else: shifted_inputs = inputs - input_left_cumwidths outputs = (inputs_a * shifted_inputs.pow(3) + inputs_b * shifted_inputs.pow(2) + inputs_c * shifted_inputs + inputs_d) logabsdet = torch.log((3 * inputs_a * shifted_inputs.pow(2) + 2 * inputs_b * shifted_inputs + inputs_c)) if inverse: outputs = outputs * (right - left) + left else: outputs = outputs * (top - bottom) + bottom return outputs, logabsdet
def mag_phase(complex_tensor): mag = (complex_tensor.pow(2.0).sum(-1) + 1e-8).pow(0.5 * 1.0) phase = torch.atan2(complex_tensor[..., 1], complex_tensor[..., 0]) return mag, phase
def phase(x): phase = torch.atan2(x[..., 0], x[..., 1]) return phase
def rotate(self, state): """ Transform the coordinate to agent-centric. Input tuple include robot state tensor and human state tensor. robot state tensor is of size (batch_size, number, state_length)(for example 100*1*9) human state tensor is of size (batch_size, number, state_length)(for example 100*5*5) """ # for robot # 'px', 'py', 'vx', 'vy', 'radius', 'gx', 'gy', 'v_pref', 'theta' # 0 1 2 3 4 5 6 7 8 # for human # 'px', 'py', 'vx', 'vy', 'radius' # 0 1 2 3 4 assert len(state[0].shape) == 3 if len(state[1].shape) == 3: batch = state[0].shape[0] robot_state = state[0] human_state = state[1] human_num = state[1].shape[1] dx = robot_state[:, :, 5] - robot_state[:, :, 0] dy = robot_state[:, :, 6] - robot_state[:, :, 1] dx = dx.unsqueeze(1) dy = dy.unsqueeze(1) dg = torch.norm(torch.cat([dx, dy], dim=2), 2, dim=2, keepdim=True) rot = torch.atan2(dy, dx) cos_rot = torch.cos(rot) sin_rot = torch.sin(rot) transform_matrix = torch.cat((cos_rot, -sin_rot, sin_rot, cos_rot), dim=1).reshape(batch, 2, 2) robot_velocities = torch.bmm(robot_state[:, :, 2:4], transform_matrix) radius_r = robot_state[:, :, 4].unsqueeze(1) v_pref = robot_state[:, :, 7].unsqueeze(1) target_heading = torch.zeros_like(radius_r) pos_r = torch.zeros_like(robot_velocities) cur_heading = (robot_state[:, :, 8].unsqueeze(1) - rot + np.pi) % (2 * np.pi) - np.pi new_robot_state = torch.cat((pos_r, robot_velocities, radius_r, dg, target_heading, v_pref, cur_heading), dim=2) human_positions = human_state[:, :, 0:2] - robot_state[:, :, 0:2] human_positions = torch.bmm(human_positions, transform_matrix) human_velocities = human_state[:, :, 2:4] human_velocities = torch.bmm(human_velocities, transform_matrix) human_radius = human_state[:, :, 4].unsqueeze(2) + 0.3 new_human_state = torch.cat((human_positions, human_velocities, human_radius), dim=2) new_state = (new_robot_state, new_human_state) return new_state else: batch = state[0].shape[0] robot_state = state[0] dx = robot_state[:, :, 5] - robot_state[:, :, 0] dy = robot_state[:, :, 6] - robot_state[:, :, 1] dx = dx.unsqueeze(1) dy = dy.unsqueeze(1) radius_r = robot_state[:, :, 4].unsqueeze(1) dg = torch.norm(torch.cat([dx, dy], dim=2), 2, dim=2, keepdim=True) rot = torch.atan2(dy, dx) cos_rot = torch.cos(rot) sin_rot = torch.sin(rot) vx = (robot_state[:, :, 2].unsqueeze(1) * cos_rot + robot_state[:, :, 3].unsqueeze(1) * sin_rot).reshape((batch, 1, -1)) vy = (robot_state[:, :, 3].unsqueeze(1) * cos_rot - robot_state[:, :, 2].unsqueeze(1) * sin_rot).reshape((batch, 1, -1)) v_pref = robot_state[:, :, 7].unsqueeze(1) theta = robot_state[:, :, 8].unsqueeze(1) px_r = torch.zeros_like(v_pref) py_r = torch.zeros_like(v_pref) new_robot_state = torch.cat((px_r, py_r, vx, vy, radius_r, dg, rot, v_pref, theta), dim=2) new_state = (new_robot_state, None) return new_state
def get_angle(v1, v2): return torch.atan2( torch.cross(v1, v2, dim=1).norm(p=2, dim=1), (v1 * v2).sum(dim=1))
def _homography_joint_svd( self, top_corners: torch.Tensor, # in [-1, 1] bottom_corners: torch.Tensor, # in [-1, 1] floor_z: float = -1.6, ceil_z: float = 1.6, ): b, N, _ = top_corners.size() floor_u = bottom_corners[:, :, 0] * np.pi floor_v = bottom_corners[:, :, 1] * (-0.5 * np.pi) floor_c = floor_z / torch.tan(floor_v) floor_x = floor_c * torch.sin(floor_u) floor_y = -floor_c * torch.cos(floor_u) floor_xy = torch.stack([floor_x, floor_y], dim=-1) floor_scale = self._get_scale_all(floor_xy) floor_scale = floor_scale / 2.0 floor_ceil_c = torch.linalg.norm(floor_xy, ord=2, dim=-1) floor_ceil_v = top_corners[:, :, 1] * (-0.5 * np.pi) floor_ceil_z = (floor_ceil_c * torch.tan(floor_ceil_v)).mean( dim=1, keepdim=True) floor_ceil_z = floor_ceil_z.unsqueeze(1).expand(b, 4, 1).contiguous() ceil_u_t = top_corners[:, :, 0] * np.pi ceil_v_t = top_corners[:, :, 1] * (-0.5 * np.pi) ceil_c = ceil_z / torch.tan(ceil_v_t) ceil_x = ceil_c * torch.sin(ceil_u_t) ceil_y = -ceil_c * torch.cos(ceil_u_t) ceil_xy = torch.stack([ceil_x, ceil_y], dim=-1) ceil_floor_c = torch.linalg.norm(ceil_xy, ord=2, dim=-1) ceil_v_b = bottom_corners[:, :, 1] * (-0.5 * np.pi) ceil_floor_z = (ceil_floor_c * torch.tan(ceil_v_b)).mean(dim=1, keepdim=True) fix_ceil = -ceil_z / ceil_floor_z ceil_z_fix = ceil_z * fix_ceil ceil_z_fix = ceil_z_fix.unsqueeze(1).expand(b, 4, 1).contiguous() ceil_floor_fixed_c = ceil_z_fix.squeeze(-1) / torch.tan(ceil_v_t) ceil_x = ceil_floor_fixed_c * torch.sin(ceil_u_t) ceil_y = -ceil_floor_fixed_c * torch.cos(ceil_u_t) ceil_xy = torch.stack([ceil_x, ceil_y], dim=-1) ceil_scale = self._get_scale_all(ceil_xy) ceil_scale = ceil_scale / 2.0 joint_xy = 0.5 * (floor_xy + ceil_xy) joint_scale = 0.5 * (floor_scale + ceil_scale) joint_centroid = joint_xy.mean(dim=1) joint_xy = joint_xy - joint_centroid.unsqueeze(1) inds = torch.sort( torch.atan2(joint_xy[..., 0], joint_xy[..., 1] + 1e-12))[1] axes = self.cuboid_axes[:, inds.squeeze(), :] homography = kornia.get_perspective_transform(joint_xy, axes) homogeneous = torch.cat( [joint_xy, torch.ones_like(joint_xy[..., -1:])], dim=2) xformed = (homography @ homogeneous.transpose(1, 2)).transpose(1, 2) xformed = xformed[:, :, :2] / xformed[:, :, 2].unsqueeze(-1) rect_joint_xy = xformed * joint_scale.unsqueeze( 1) + joint_centroid.unsqueeze(1) original_xy = joint_xy + joint_centroid.unsqueeze(1) R, t, s = self._svd(rect_joint_xy, original_xy[:, inds.squeeze(), :]) rect_joint_xy = self._transform_points(rect_joint_xy, R, t, s) bottom_points = torch.cat( [rect_joint_xy, floor_z * torch.ones_like(floor_c.unsqueeze(-1))], dim=-1) top_points = torch.cat([rect_joint_xy, ceil_z_fix], dim=-1) return top_points, bottom_points
def extract_ampl_phase(fft_im): # fft_im: size should be bx3xhxwx2 fft_amp = fft_im[:, :, :, :, 0]**2 + fft_im[:, :, :, :, 1]**2 fft_amp = torch.sqrt(fft_amp) fft_pha = torch.atan2(fft_im[:, :, :, :, 1], fft_im[:, :, :, :, 0]) return fft_amp, fft_pha
def xyz2uv(xy, z=-1): c = torch.sqrt((xy**2).sum(1)) u = torch.atan2(xy[:, 1], xy[:, 0]).view(-1, 1) v = torch.atan2(torch.zeros_like(c) + z, c).view(-1, 1) return torch.cat([u, v], dim=1)
def getCouzinModelDir_vector_torch(diff, angles, r_r, r_s, viewingzone): # this is a partially optimized way to calculate the desired direction. It is "partially optimized", because some of the things below could probably be better - the baseline performance is about the same as the naiive cpu implementation. However, with this, the scaling with larger number of neighbors is much better - adding a larger social zone (and therefore more neighbors), does not change the running time, whereas it does significantly for the regular version ntorch = torch.from_numpy(diff) viewtest = torch.from_numpy(np.cos([viewingzone])) anglestorch = torch.from_numpy(angles[:, None]) if viewingzone < np.pi: viewneighbors = torch.cos( anglestorch - torch.atan2(ntorch[:, :, 1], ntorch[:, :, 0])) > viewtest else: viewneighbors = torch.ones(diff.shape[0:2]) viewneighbors = viewneighbors.type(torch.uint8) # this wasn't any faster. # calc = torch.cos(anglestorch-torch.atan2(ntorch[:,:,1],ntorch[:,:,0])) # viewneighbors = calc.ge(viewtest) repzone = viewneighbors & (ntorch[:, :, 2] <= r_r) & (ntorch[:, :, 2] > 0) socialzone = viewneighbors & (np.logical_not(repzone)) & ( ntorch[:, :, 2] <= r_s) & (ntorch[:, :, 2] > 0) userep = torch.sum(repzone, 1) > 0 usesocial = np.logical_not(userep) & (torch.sum(socialzone, 1) > 0) repzonedouble = repzone.type(torch.DoubleTensor) socialzonedouble = socialzone.type(torch.DoubleTensor) xrep = ntorch[:, :, 0] * repzonedouble yrep = ntorch[:, :, 1] * repzonedouble distrep = ntorch[:, :, 2][repzone] xrep[repzone] = xrep[repzone] / distrep yrep[repzone] = yrep[repzone] / distrep rx = -torch.sum(xrep, 1) ry = -torch.sum(yrep, 1) rx = rx[userep] ry = ry[userep] rnorm = torch.sqrt(rx**2 + ry**2) rx, ry = rx / rnorm, ry / rnorm xsoc = ntorch[:, :, 0] * socialzonedouble ysoc = ntorch[:, :, 1] * socialzonedouble distsoc = ntorch[:, :, 2][socialzone] xsoc[socialzone] = xsoc[socialzone] / distsoc ysoc[socialzone] = ysoc[socialzone] / distsoc ax, ay = torch.sum(xsoc, 1), torch.sum(ysoc, 1) ax = ax[usesocial] ay = ay[usesocial] vxsoc = ntorch[:, :, 3] * socialzonedouble vysoc = ntorch[:, :, 4] * socialzonedouble vnorm = torch.sqrt(vxsoc[socialzone]**2 + vysoc[socialzone]**2) vxsoc[socialzone] = vxsoc[socialzone] / vnorm vysoc[socialzone] = vysoc[socialzone] / vnorm ox, oy = torch.sum(vxsoc, 1), torch.sum(vysoc, 1) ox = ox[usesocial] oy = oy[usesocial] sx, sy = ax + ox, ay + oy snorm = torch.sqrt(sx**2 + sy**2) sx = sx / snorm sy = sy / snorm newdirs = torch.zeros((len(diff), 2)) newdirs = newdirs.type(torch.DoubleTensor) newdirs[userep, 0] = rx newdirs[userep, 1] = ry newdirs[usesocial, 0] = sx newdirs[usesocial, 1] = sy return newdirs.data.numpy()
def _predict(self): # This is just _build_network in tf-faster-rcnn torch.backends.cudnn.benchmark = False net_conv = self._image_to_head() # build the anchors for the image self._anchor_component(net_conv.size(2), net_conv.size(3)) rois = self._region_proposal(net_conv) if cfg.POOLING_MODE == 'align': pool5 = self._roi_align_layer(net_conv, rois) else: pool5 = self._roi_pool_layer(net_conv, rois) if self._mode == 'TRAIN': torch.backends.cudnn.benchmark = True # benchmark because now the input size are fixed fc7 = self._head_to_tail(pool5) cls_prob, bbox_pred = self._region_classification(fc7) # print("pool5 = {}".format(pool5.shape)) # print("fc7 = {}".format(fc7.shape)) # print("rois = {}".format(rois.shape)) # print("bbox_pred = {}".format(bbox_pred.shape)) # print("bbox_pred_net = {}".format(self.bbox_pred_net.weight)) # return rois, cls_prob, bbox_pred num_rois = rois.shape[0] z = self.relation_fc_1(fc7) z = F.relu(self.relation_fc_2(z)) eps = torch.mm(z, z.t()) _, indices = torch.topk(eps, k=32, dim=0) cls_w = self.cls_score_net.weight represent = torch.mm(cls_prob, cls_w) # print("cls_w = {}, cls_prob = {}, represent = {}".format(cls_w.shape, cls_prob.shape, represent.shape)) cls_pred = torch.max(cls_prob, 1)[1] bbox_pred_reshape = bbox_pred.view(-1, 1001, 4) bbox_pred_cls = torch.zeros(num_rois, 4) for i, cls in enumerate(cls_pred): bbox_pred_cls[i] = bbox_pred_reshape[i][cls] bbox_pred_ctr = bbox_pred_cls[:, 0:2] + bbox_pred_cls[:, 2:4] relation = torch.empty(2, 32 * num_rois, dtype=torch.long).to(self._device) # U = torch.empty(32*128, 2).to(self._device) relation[0] = torch.Tensor(list(range(num_rois)) * 32) # , type=torch.long) relation[1] = indices.view(-1) # print("relation[0] = {}".format(relation[0])) # print("bbox_pred_ctr ={}".format(bbox_pred_ctr[relation[0]])) coord_i = bbox_pred_ctr[relation[0]] coord_j = bbox_pred_ctr[relation[1]] # print("coord_i = {}, coord_j= {}".format(coord_i.shape, coord_j.shape)) d = torch.sqrt((coord_i[:, 0] - coord_j[:, 0])**2 + (coord_i[:, 1] - coord_j[:, 1])**2) theta = torch.atan2((coord_j[:, 1] - coord_i[:, 1]), (coord_j[:, 0] - coord_i[:, 0])) U = torch.stack([d, theta], dim=1).to(self._device) # print("represent = {} ".format(represent.data)) # print("relation = {} ".format(relation.data)) # print("U = {} ".format(U.data)) f = self.gaussian(represent, relation, U) f2 = F.relu(self.sg_conv_1(f)) h = F.relu(self.sg_conv_2(f2)) # print("fc7 = {}, h = {}".format(fc7.shape, h.shape)) new_f = torch.cat([fc7, h], dim=1) # print("fc7 = {}, h = {}, new_f = {}".format(fc7.shape, h.shape, new_f.shape)) new_cls_prob, new_bbox_pred = self._new_region_classification(new_f) for k in self._predictions.keys(): self._score_summaries[k] = self._predictions[k] # print("rois = {}, new_cls_prob = {}, new_bbox_pred = {}".format(rois.shape, new_cls_prob.shape, new_bbox_pred.shape)) return rois, new_cls_prob, new_bbox_pred
def single_step_euler(ode_params, x_curr, y_curr, z_curr, t_curr, input_params, device_name): h = ode_params.h A = ode_params.A f2 = ode_params.f2 rrpc = ode_params.rrpc.float() a_p = input_params[0] a_q = input_params[3] a_r = input_params[6] a_s = input_params[9] a_t = input_params[12] b_p = input_params[1] b_q = input_params[4] b_r = input_params[7] b_s = input_params[10] b_t = input_params[13] theta_p = input_params[2] theta_q = input_params[5] theta_r = input_params[8] theta_s = input_params[11] theta_t = input_params[14] alpha = 1 - (x_curr * x_curr + y_curr * y_curr)**0.5 cast = (t_curr / h).type(torch.IntTensor) tensor_temp = 1 + cast tensor_temp = tensor_temp % len(rrpc) # print(tensor_temp.numpy()) # print(len(rrpc)) #tensor_temp = tf.reshape(tensor_temp, []) if rrpc[tensor_temp] == 0: print("***inside zero***") omega = (2.0 * math.pi / 1e-3) # omega = torch.tensor(math.inf).to(device_name) else: omega = (2.0 * math.pi / rrpc[tensor_temp]).to(device_name) d_x_d_t_next = alpha * x_curr - omega * y_curr d_y_d_t_next = alpha * y_curr + omega * x_curr theta = torch.atan2(y_curr, x_curr) delta_theta_p = torch.fmod(theta - theta_p, 2 * math.pi) delta_theta_q = torch.fmod(theta - theta_q, 2 * math.pi) delta_theta_r = torch.fmod(theta - theta_r, 2 * math.pi) delta_theta_s = torch.fmod(theta - theta_s, 2 * math.pi) delta_theta_t = torch.fmod(theta - theta_t, 2 * math.pi) z_p = a_p * delta_theta_p * \ torch.exp((- delta_theta_p * delta_theta_p / (2 * b_p * b_p))) z_q = a_q * delta_theta_q * \ torch.exp((- delta_theta_q * delta_theta_q / (2 * b_q * b_q))) z_r = a_r * delta_theta_r * \ torch.exp((- delta_theta_r * delta_theta_r / (2 * b_r * b_r))) z_s = a_s * delta_theta_s * \ torch.exp((- delta_theta_s * delta_theta_s / (2 * b_s * b_s))) z_t = a_t * delta_theta_t * \ torch.exp((- delta_theta_t * delta_theta_t / (2 * b_t * b_t))) z_0_t = ( A * torch.sin(torch.tensor(2 * math.pi).to(device_name) * f2 * t_curr).to(device_name)).to(device_name) d_z_d_t_next = -1 * (z_p + z_q + z_r + z_s + z_t) - (z_curr - z_0_t) k1_x = h * d_x_d_t_next k1_y = h * d_y_d_t_next k1_z = h * d_z_d_t_next # Calculate next stage: x_next = x_curr + k1_x y_next = y_curr + k1_y z_next = z_curr + k1_z return x_next, y_next, z_next
def predict(self, example, preds_dicts, test_cfg, **kwargs): """decode, nms, then return the detection result. Additionaly support double flip testing """ # get loss info rets = [] metas = [] double_flip = test_cfg.get('double_flip', False) post_center_range = test_cfg.post_center_limit_range if len(post_center_range) > 0: post_center_range = torch.tensor( post_center_range, dtype=preds_dicts[0]['hm'].dtype, device=preds_dicts[0]['hm'].device, ) for task_id, preds_dict in enumerate(preds_dicts): # convert N C H W to N H W C for key, val in preds_dict.items(): preds_dict[key] = val.permute(0, 2, 3, 1).contiguous() batch_size = preds_dict['hm'].shape[0] if double_flip: assert batch_size % 4 == 0, print(batch_size) batch_size = int(batch_size / 4) for k in preds_dict.keys(): # transform the prediction map back to their original coordinate befor flipping # the flipped predictions are ordered in a group of 4. The first one is the original pointcloud # the second one is X flip pointcloud(y=-y), the third one is Y flip pointcloud(x=-x), and the last one is # X and Y flip pointcloud(x=-x, y=-y). # Also please note that pytorch's flip function is defined on higher dimensional space, so dims=[2] means that # it is flipping along the axis with H length(which is normaly the Y axis), however in our traditional word, it is flipping along # the X axis. The below flip follows pytorch's definition yflip(y=-y) xflip(x=-x) _, H, W, C = preds_dict[k].shape preds_dict[k] = preds_dict[k].reshape( int(batch_size), 4, H, W, C) preds_dict[k][:, 1] = torch.flip(preds_dict[k][:, 1], dims=[1]) preds_dict[k][:, 2] = torch.flip(preds_dict[k][:, 2], dims=[2]) preds_dict[k][:, 3] = torch.flip(preds_dict[k][:, 3], dims=[1, 2]) if "metadata" not in example or len(example["metadata"]) == 0: meta_list = [None] * batch_size else: meta_list = example["metadata"] if double_flip: meta_list = meta_list[:4 * int(batch_size):4] batch_hm = torch.sigmoid(preds_dict['hm']) batch_dim = torch.exp(preds_dict['dim']) batch_rots = preds_dict['rot'][..., 0:1] batch_rotc = preds_dict['rot'][..., 1:2] batch_reg = preds_dict['reg'] batch_hei = preds_dict['height'] if double_flip: batch_hm = batch_hm.mean(dim=1) batch_hei = batch_hei.mean(dim=1) batch_dim = batch_dim.mean(dim=1) # y = -y reg_y = 1-reg_y batch_reg[:, 1, ..., 1] = 1 - batch_reg[:, 1, ..., 1] batch_reg[:, 2, ..., 0] = 1 - batch_reg[:, 2, ..., 0] batch_reg[:, 3, ..., 0] = 1 - batch_reg[:, 3, ..., 0] batch_reg[:, 3, ..., 1] = 1 - batch_reg[:, 3, ..., 1] batch_reg = batch_reg.mean(dim=1) # first yflip # y = -y theta = pi -theta # sin(pi-theta) = sin(theta) cos(pi-theta) = -cos(theta) # batch_rots[:, 1] the same batch_rotc[:, 1] *= -1 # then xflip x = -x theta = 2pi - theta # sin(2pi - theta) = -sin(theta) cos(2pi - theta) = cos(theta) # batch_rots[:, 2] the same batch_rots[:, 2] *= -1 # double flip batch_rots[:, 3] *= -1 batch_rotc[:, 3] *= -1 batch_rotc = batch_rotc.mean(dim=1) batch_rots = batch_rots.mean(dim=1) batch_rot = torch.atan2(batch_rots, batch_rotc) batch, H, W, num_cls = batch_hm.size() batch_reg = batch_reg.reshape(batch, H * W, 2) batch_hei = batch_hei.reshape(batch, H * W, 1) batch_rot = batch_rot.reshape(batch, H * W, 1) batch_dim = batch_dim.reshape(batch, H * W, 3) batch_hm = batch_hm.reshape(batch, H * W, num_cls) ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)]) ys = ys.view(1, H, W).repeat(batch, 1, 1).to(batch_hm) xs = xs.view(1, H, W).repeat(batch, 1, 1).to(batch_hm) xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1] ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2] xs = xs * test_cfg.out_size_factor * test_cfg.voxel_size[ 0] + test_cfg.pc_range[0] ys = ys * test_cfg.out_size_factor * test_cfg.voxel_size[ 1] + test_cfg.pc_range[1] if 'vel' in preds_dict: batch_vel = preds_dict['vel'] if double_flip: # flip vy batch_vel[:, 1, ..., 1] *= -1 # flip vx batch_vel[:, 2, ..., 0] *= -1 batch_vel[:, 3] *= -1 batch_vel = batch_vel.mean(dim=1) batch_vel = batch_vel.reshape(batch, H * W, 2) batch_box_preds = torch.cat( [xs, ys, batch_hei, batch_dim, batch_vel, batch_rot], dim=2) else: batch_box_preds = torch.cat( [xs, ys, batch_hei, batch_dim, batch_rot], dim=2) metas.append(meta_list) if test_cfg.get('per_class_nms', False): pass else: rets.append( self.post_processing(batch_box_preds, batch_hm, test_cfg, post_center_range, task_id)) # Merge branches results ret_list = [] num_samples = len(rets[0]) ret_list = [] for i in range(num_samples): ret = {} for k in rets[0][i].keys(): if k in ["box3d_lidar", "scores"]: ret[k] = torch.cat([ret[i][k] for ret in rets]) elif k in ["label_preds"]: flag = 0 for j, num_class in enumerate(self.num_classes): rets[j][i][k] += flag flag += num_class ret[k] = torch.cat([ret[i][k] for ret in rets]) ret['metadata'] = metas[0][i] ret_list.append(ret) return ret_list
if not (configs.mosaic and configs.show_train_data): img_file = img_files[0] img_rgb = cv2.imread(img_file) calib = kitti_data_utils.Calibration( img_file.replace(".png", ".txt").replace("image_2", "calib")) objects_pred = invert_target(targets[:, 1:], calib, img_rgb.shape, RGB_Map=None) img_rgb = show_image_with_boxes(img_rgb, objects_pred, calib, False) # Rescale target targets[:, 2:6] *= configs.img_size # Get yaw angle targets[:, 6] = torch.atan2(targets[:, 6], targets[:, 7]) img_bev = imgs.squeeze() * 255 img_bev = img_bev.permute(1, 2, 0).numpy().astype(np.uint8) img_bev = cv2.resize(img_bev, (configs.img_size, configs.img_size)) for c, x, y, w, l, yaw in targets[:, 1:7].numpy(): # Draw rotated box bev_utils.drawRotatedBox(img_bev, x, y, w, l, yaw, cnf.colors[int(c)]) img_bev = cv2.flip(cv2.flip(img_bev, 0), 1) if configs.mosaic and configs.show_train_data: cv2.imshow('mosaic_sample', img_bev) else:
def main(): #model select print('Model initializing\n') net = torch.nn.DataParallel(AttentionModel(257, hidden_size = args.hidden_size, dropout_p = args.dropout_p, use_attn = args.attn_use, stacked_encoder = args.stacked_encoder, attn_len = args.attn_len)) #Check point load print('Trying Checkpoint Load\n') best_PESQ = 0. best_STOI = 0. ckpt_path = args.ckpt_path if os.path.exists(ckpt_path): ckpt = torch.load(ckpt_path) try: net.load_state_dict(ckpt['model']) net = net.module # uncover DataParallel best_STOI = ckpt['best_STOI'] print('checkpoint is loaded !') print('current best loss : %.4f' % best_STOI) except RuntimeError as e: print('wrong checkpoint\n') else: print('checkpoint not exist!') print('current best loss : %.4f' % best_STOI) #test phase net.eval() with torch.no_grad(): inputData, sr = librosa.load(args.noisy_wav, sr=None) outputData, sr = librosa.load(args.clean_wav, sr=None) inputData = np.float32(inputData) outputData = np.float32(outputData) mixed_audio = torch.from_numpy(inputData).type(torch.FloatTensor) clean_audio = torch.from_numpy(outputData).type(torch.FloatTensor) mixed = stft(mixed_audio) mixed = mixed.unsqueeze(0) mixed = mixed.transpose(1,2) cleaned = stft(clean_audio) cleaned = cleaned.unsqueeze(0) cleaned = cleaned.transpose(1,2) real, imag = mixed[..., 0], mixed[..., 1] clean_real, clean_imag = cleaned[..., 0], cleaned[..., 1] mag = torch.sqrt(real**2 + imag**2) clean_mag = torch.sqrt(clean_real**2 + clean_imag**2) phase = torch.atan2(imag, real) logits_mag, logits_attn_weight = net(mag) logits_real = logits_mag * torch.cos(phase) logits_imag = logits_mag * torch.sin(phase) logits_real, logits_imag = torch.squeeze(logits_real, 1), torch.squeeze(logits_imag, 1) logits_real = logits_real.transpose(1,2) logits_imag = logits_imag.transpose(1,2) logits_audio = istft(logits_real, logits_imag, inputData.shape[0]) logits_audio = torch.squeeze(logits_audio, dim=1) print(logits_audio[0]) librosa.output.write_wav('./out.wav', logits_audio[0].cpu().data.numpy(), 16000) test_loss = F.mse_loss(logits_mag, clean_mag, True) test_PESQ = pesq(outputData, logits_audio[0].detach().cpu().numpy(), 16000) test_STOI = stoi(outputData, logits_audio[0].detach().cpu().numpy(), 16000, extended=False) print("Saved attention weight visualization to attention_viz.png") utils.plot_head_map(logits_attn_weight[0]) # FIXME - Issue with pcm_f32le. Require pcm_s16le print("Saved clean spectrogram visualization to spec_clean.png") clean_spect = utils.make_spectrogram_array(args.clean_wav) utils.save_spectrogram(clean_spect, 'clean') print("Saved noisy spectrogram visualization to spec_noisy.png") noisy_spect = utils.make_spectrogram_array(args.noisy_wav) utils.save_spectrogram(noisy_spect, 'noisy') print("Saved enhanced spectrogram visualization to spec_enhanced.png") enhanced_spect = utils.make_spectrogram_array('./out.wav') utils.save_spectrogram(enhanced_spect, 'enhanced') #test accuracy print('test loss : {:.4f} PESQ : {:.4f} STOI : {:.4f}'.format(test_loss, test_PESQ, test_STOI))
def forward(self, input1, input2): self.save_for_backward(input1, input2) return torch.atan2(input1, input2)
def angle(complex_tensor): return torch.atan2(complex_tensor[..., 1], complex_tensor[..., 0])
def forward(self, input, return_rot_matrix = True): xy = self.features(self.input_norm(input)).view(-1,2) angle = torch.atan2(xy[:,0] + 1e-8, xy[:,1]+1e-8); if return_rot_matrix: return get_rotation_matrix(angle) return angle