def __call__(self, x): """Forward computation of PSPNet Args: x: Input array or Variable. Returns: Training time: it returns the outputs from auxiliary branch and the main branch. So the returned value is a tuple of two Variables. Inference time: it returns the output of the main branch. So the returned value is a sinle Variable which forms ``(N, n_class, H, W)`` where ``N`` is the batchsize and ``n_class`` is the number of classes specified in the constructor. ``H, W`` is the input image size. """ if chainer.settings.train: aux, h = self.trunk(x) aux = F.dropout(self.cbr_aux(aux), ratio=0.1) aux = self.out_aux(aux) aux = F.resize_images(aux, x.shape[2:]) else: h = self.trunk(x) h = self.ppm(h) h = F.dropout(self.cbr_main(h), ratio=0.1) h = self.out_main(h) h = F.resize_images(h, x.shape[2:]) if chainer.settings.train: return aux, h else: return h
def __call__(self, x, img_size): assert img_size[0] % 16 == 0 assert img_size[1] % 16 == 0 # # conv1 -> bn1 -> res2 -> res3 -> res4 # h = self.extractor(x) # 1/16 h = x # res5 h = self.res5(h) # 1/16 assert h.shape[2] == (img_size[0] / 16) assert h.shape[3] == (img_size[1] / 16) h = self.conv6(h) # 1/16 conv6 = h # score h = self.score_fr(conv6) # 1/16 h = F.resize_images(h, img_size) # 1/1 score = h # score_oc h = self.score_oc(conv6) # 1/16 h = F.resize_images(h, img_size) # 1/1 score_oc = h return score, score_oc
def __call__(self, orig_img): orig_img = orig_img.copy() if self.precise: return self.detect_precise(orig_img) orig_img_h, orig_img_w, _ = orig_img.shape input_w, input_h = self.compute_optimal_size(orig_img, params['inference_img_size']) map_w, map_h = self.compute_optimal_size(orig_img, params['heatmap_size']) resized_image = cv2.resize(orig_img, (input_w, input_h)) x_data = self.preprocess(resized_image) if self.device >= 0: x_data = cuda.to_gpu(x_data) h1s, h2s = self.model(x_data) pafs = F.resize_images(h1s[-1], (map_h, map_w)).data[0] heatmaps = F.resize_images(h2s[-1], (map_h, map_w)).data[0] if self.device >= 0: pafs = pafs.get() cuda.get_device_from_id(self.device).synchronize() all_peaks = self.compute_peaks_from_heatmaps(heatmaps) if len(all_peaks) == 0: return np.empty((0, len(JointType), 3)), np.empty(0) all_connections = self.compute_connections(pafs, all_peaks, map_w, params) subsets = self.grouping_key_points(all_connections, all_peaks, params) all_peaks[:, 1] *= orig_img_w / map_w all_peaks[:, 2] *= orig_img_h / map_h poses = self.subsets_to_pose_array(subsets, all_peaks) scores = subsets[:, -2] return poses, scores
def extract(self, images): prepared_images = self.xp.asarray(images) prepared_images = self.models[0].prepare(prepared_images, 1) h_segs, h_hors, h_vers = [], [], [] for i in range(len(self.scales)): H, W = prepared_images.shape[2:] hh = int((H // self.models[0].downscale) * self.scales[i]) ww = int((W // self.models[0].downscale) * self.scales[i]) resized_prepared_images = F.resize_images(prepared_images, (hh, ww)) with chainer.using_config('train', False), chainer.no_backprop_mode(): h_seg, h_hor, h_ver = self.__call__(resized_prepared_images) if self.scales[i] != 1: h_seg = F.resize_images(h_seg, (512 // self.models[0].downscale, 512 // self.models[0].downscale)) h_hor = F.resize_images(h_hor, (512 // self.models[0].downscale, 512 // self.models[0].downscale)) h_ver = F.resize_images(h_ver, (512 // self.models[0].downscale, 512 // self.models[0].downscale)) weight = self.weights[i] / sum(self.weights) h_segs.append(h_seg.array * weight) h_hors.append(h_hor.array * weight) h_vers.append(h_ver.array * weight) h_seg = cuda.to_cpu(sum(h_segs)) h_hor = cuda.to_cpu(sum(h_hors)) h_ver = cuda.to_cpu(sum(h_vers)) return h_seg, h_hor, h_ver
def shared_middle(self, batch_size, width_rgb, width_flow, rpn_scores_rgb, rpn_locs_rgb, rpn_scores_flow, rpn_locs_flow, anchor_rgb, gt_segments_rgb, labels, seg_info): # rpn_scores_rgb shape = (N, W_rgb * A, 2) rpn_scores_flow shape = (N, W_flow * A, 2) n_anchor = anchor_rgb.shape[1] rpn_locs_flow = F.transpose(rpn_locs_flow.reshape(batch_size, width_flow, n_anchor, 2), axes=(0, 3, 1, 2)) # (B, 2, W_flow, A) rpn_locs_flow = F.resize_images(rpn_locs_flow, (width_rgb, n_anchor)) # (B, 2, W_rgb, A) # B, W_rgb, A, 2 => B, W_rgb * A, 2 rpn_locs_flow = F.reshape(F.transpose(rpn_locs_flow, axes=(0, 2, 3 ,1)), shape=(batch_size, width_rgb * n_anchor, 2)) rpn_locs = F.average(F.stack([rpn_locs_rgb, rpn_locs_flow]), axis=0) rpn_scores_flow = F.transpose(rpn_scores_flow.reshape(batch_size, width_flow, n_anchor, 2), axes=(0, 3, 1, 2)) rpn_scores_flow = F.resize_images(rpn_scores_flow, (width_rgb, n_anchor)) # (B, 2, W_rgb, A) # B, W_rgb, A, 2 => B, W_rgb * A, 2 rpn_scores_flow = F.reshape(F.transpose(rpn_scores_flow, axes=(0, 2, 3, 1)), shape=(batch_size, width_rgb * n_anchor, 2)) rpn_scores = F.average(F.stack([rpn_scores_rgb,rpn_scores_flow]), axis=0) # merge over! rois, roi_indices = self.time_seg_train_chain_rgb.nms_process(batch_size, width_rgb, n_anchor, rpn_scores, rpn_locs, anchor_rgb) sample_roi, sample_roi_index, gt_roi_loc, gt_roi_label = self.time_seg_train_chain_rgb.proposal_target_creator( rois, roi_indices, gt_segments_rgb, labels, seg_info, self.time_seg_train_chain_rgb.loc_normalize_mean, self.time_seg_train_chain_rgb.loc_normalize_std) return sample_roi, sample_roi_index, gt_roi_loc, gt_roi_label
def compute_loss(imgs, pafs_ys, heatmaps_ys, pafs_t, heatmaps_t, ignore_mask): heatmap_loss_log = [] paf_loss_log = [] total_loss = 0 paf_masks = ignore_mask[:, None].repeat(pafs_t.shape[1], axis=1) heatmap_masks = ignore_mask[:, None].repeat(heatmaps_t.shape[1], axis=1) # compute loss on each stage for pafs_y, heatmaps_y in zip(pafs_ys, heatmaps_ys): stage_pafs_t = pafs_t.copy() stage_heatmaps_t = heatmaps_t.copy() stage_paf_masks = paf_masks.copy() stage_heatmap_masks = heatmap_masks.copy() if pafs_y.shape != stage_pafs_t.shape: stage_pafs_t = F.resize_images(stage_pafs_t, pafs_y.shape[2:]).data stage_heatmaps_t = F.resize_images(stage_heatmaps_t, pafs_y.shape[2:]).data stage_paf_masks = F.resize_images(stage_paf_masks.astype('f'), pafs_y.shape[2:]).data > 0 stage_heatmap_masks = F.resize_images(stage_heatmap_masks.astype('f'), pafs_y.shape[2:]).data > 0 stage_pafs_t[stage_paf_masks == True] = pafs_y.data[stage_paf_masks == True] stage_heatmaps_t[stage_heatmap_masks == True] = heatmaps_y.data[stage_heatmap_masks == True] pafs_loss = F.mean_squared_error(pafs_y, stage_pafs_t) heatmaps_loss = F.mean_squared_error(heatmaps_y, stage_heatmaps_t) total_loss += pafs_loss + heatmaps_loss paf_loss_log.append(float(cuda.to_cpu(pafs_loss.data))) heatmap_loss_log.append(float(cuda.to_cpu(heatmaps_loss.data))) return total_loss, paf_loss_log, heatmap_loss_log
def predict(self, x, no_of_predictions=1, seq_len=4): """ :param x: :param no_of_predictions: :param seq_len: :return: """ # x shape = [n, 12, h, w] xp = cp.get_array_module(x) n, c, h, w = x.shape outputs = [] for i in range(no_of_predictions): print("Predicting frame no : ", i + 1) seq = resize_images(x, (int(h / 2**3), int(w / 2**3))) print((int(h / 2**3), int(w / 2**3))) output = None for j in range(1, 5): output = self.singleforward(j, seq, output) if j != 4: seq = resize_images( x, (int(h / 2**(3 - j)), int(w / 2**(3 - j)))) outputs.append(output.data) x = xp.concatenate([x, output.data], 1)[:, -seq_len * 3:, :, :] print("Predictions done for : ", i + 1) return outputs
def __call__(self, x): assert x.shape[2] % 16 == 0 assert x.shape[3] % 16 == 0 # conv1 -> bn1 -> res2 -> res3 -> res4 h = self.extractor(x) # 1/16 # res5 h = self.res5(h) # 1/16 assert h.shape[2] == (x.shape[2] / 16) assert h.shape[3] == (x.shape[3] / 16) h = self.conv6(h) # 1/16 conv6 = h # score h = self.score_fr(conv6) # 1/16 h = F.resize_images(h, x.shape[2:4]) # 1/1 score = h # score_oc h = self.score_oc(conv6) # 1/16 h = F.resize_images(h, x.shape[2:4]) # 1/1 score_oc = h return score, score_oc
def __call__(self, x): height, width = x.shape[2:] if self.is_height: real_sp_size = height real_in_size = (real_sp_size, width) base_in_size = (self.base_sp_size, width) else: real_sp_size = width real_in_size = (height, real_sp_size) base_in_size = (height, self.base_sp_size) if real_sp_size != self.base_sp_size: if real_sp_size < self.base_sp_size: x = F.resize_images(x, output_shape=base_in_size, mode="bilinear", align_corners=True) else: # ksize = (real_in_size[0] // base_in_size[0], real_in_size[1] // base_in_size[1]) # x = F.average_pooling_2d(x, ksize=ksize) x = F.resize_images(x, output_shape=base_in_size, mode="bilinear", align_corners=True) x = F.swapaxes(x, axis1=1, axis2=self.index) x = self.conv(x) x = F.swapaxes(x, axis1=1, axis2=self.index) changed_sp_size = x.shape[self.index] if real_sp_size != changed_sp_size: if changed_sp_size < real_sp_size: x = F.resize_images(x, output_shape=real_in_size, mode="bilinear", align_corners=True) else: # ksize = (x.shape[2] // real_in_size[0], x.shape[3] // real_in_size[1]) # x = F.average_pooling_2d(x, ksize=ksize) x = F.resize_images(x, output_shape=real_in_size, mode="bilinear", align_corners=True) return x
def __call__(self,img): edit_img = img.copy() img_h,img_w ,_ = edit_img.shape #画像とheatmapの大きさの最適化(stride=8の倍数にする) input_w,input_h = self.compute_optimal_size(edit_img,constants['img_size']) map_w,map_h = self.compute_optimal_size(edit_img,constants['heatmap_size']) #画像サイズの更新と学習器に入れるためにデータの編集 resized_image = cv2.resize(edit_img, (input_w, input_h)) x_data = self.preprocess(resized_image) #GPUへの適用 if self.device >= 0: x_data = cuda.to_gpu(x_data) #学習器からの出力(全ステージから) Ss,Ls = self.model(x_data) #最終ステージの物のみ取り出す heatmaps = F.resize_images(Ss[-1], (map_h, map_w)).data[0] pafs = F.resize_images(Ls[-1], (map_h, map_w)).data[0] if self.device >= 0: pafs = pafs.get() cuda.get_device_from_id(self.device).synchronize() #heatmapからPeakを計算する all_peaks = self.compute_peaks_from_heatmaps(heatmaps) if len(all_peaks) == 0: return np.empty((0, len(JointType), 3)), np.empty(0) #peakとpafからConnectionを計算する all_connections = self.compute_connections(pafs, all_peaks, map_w, constants) #subsetの作成 subsets = self.grouping_key_points(all_connections, all_peaks, constants) all_peaks[:, 1] *= img_w / map_w all_peaks[:, 2] *= img_h / map_h #poseの計算 poses = self.subsets_to_pose_array(subsets, all_peaks) return poses
def __call__(self, x): xp = self.xp batch_size, nframes, nchannels = x.shape[:3] in_size = x.shape[3:] if self.in_episodes is None: self.in_episodes = self.out_episodes = nframes else: assert self.in_episodes == nframes self.reset_state() # BNCHW -> NBCHW x = x.transpose((1, 0, 2, 3, 4)) # encode for i in range(self.in_episodes): xi = F.resize_images(x[i], self.patch_size) xi = xi.reshape((batch_size, -1)) for e in self.encoder: hi = e(xi) xi = hi self.copy_state() # decode (reconstruct) reconst_imgs = [] with chainer.cuda.get_device_from_id(self._device_id): xi = chainer.Variable(xp.zeros_like(xi, dtype=xi.dtype)) for i in range(self.in_episodes): for r in self.reconst: ri = r(xi) xi = ri ri = ri.reshape( (batch_size, self.n_channels) + self.patch_size) # BCHW ri = F.resize_images(ri, in_size) reconst_imgs.append(ri[:, xp.newaxis]) # B, 1, C, H, W reconst_imgs = F.concat(reconst_imgs, axis=1) # BFCHW # decode (prediction) pred_imgs = None if self.predict: pred_imgs = [] with chainer.cuda.get_device_from_id(self._device_id): xi = chainer.Variable(xp.zeros_like(xi, dtype=xi.dtype)) for i in range(self.out_episodes): for p in self.pred: pi = p(xi) xi = pi pi = pi.reshape((batch_size, self.n_channels) + self.patch_size) pi = F.resize_images(pi, in_size) pred_imgs.append(pi[:, xp.newaxis]) pred_imgs = F.concat(pred_imgs, axis=1) # BFCHW return reconst_imgs, pred_imgs
def __call__(self, orig_img): orig_img = orig_img.copy() if self.precise: return self.detect_precise(orig_img) orig_img_h, orig_img_w, _ = orig_img.shape input_w, input_h = self.compute_optimal_size( orig_img, params['inference_img_size']) map_w, map_h = self.compute_optimal_size(orig_img, params['heatmap_size']) resized_image = cv2.resize(orig_img, (input_w, input_h)) x_data = self.preprocess(resized_image) if self.device >= 0: x_data = cuda.to_gpu(x_data) print("x_data.shape", x_data.shape, type(x_data)) resS = IEresult("models/FP32/pose_iter_440000.xml", "models/FP32/pose_iter_440000.bin", "CPU", x_data) print("IEresult done", resS.keys()) for k in resS.keys(): if resS[k].shape[1] == 38: H1S = resS[k] if resS[k].shape[1] == 19: H2S = resS[k] print(" stddiv/mean/max/min") h1s, h2s = self.model(x_data) print("IEbase: H1S %11.7f %11.7f %11.7f %11.7f" % self.statistics(H1S)) print("chainer:h1s %11.7f %11.7f %11.7f %11.7f" % self.statistics(h1s[-1].data[0])) print("IEbase: H2S %11.7f %11.7f %11.7f %11.7f" % self.statistics(H2S)) print("chainer:h2s %11.7f %11.7f %11.7f %11.7f" % self.statistics(h2s[-1].data[0])) print("len(h1s)", len(h1s), type(h1s)) print("len(h2s)", len(h2s), type(h2s)) print("h1s[-1].shape", h1s[-1].shape, type(h1s[-1])) print("h2s[-1].shape", h2s[-1].shape, type(h2s[-1])) pafs = F.resize_images(h1s[-1], (map_h, map_w)).data[0] heatmaps = F.resize_images(h2s[-1], (map_h, map_w)).data[0] print("pafs.shape", pafs.shape, type(pafs.shape)) print("heatmaps.shape", heatmaps.shape, type(heatmaps.shape)) if self.device >= 0: pafs = pafs.get() cuda.get_device_from_id(self.device).synchronize() all_peaks = self.compute_peaks_from_heatmaps(heatmaps) if len(all_peaks) == 0: return np.empty((0, len(JointType), 3)), np.empty(0) all_connections = self.compute_connections(pafs, all_peaks, map_w, params) subsets = self.grouping_key_points(all_connections, all_peaks, params) all_peaks[:, 1] *= orig_img_w / map_w all_peaks[:, 2] *= orig_img_h / map_h poses = self.subsets_to_pose_array(subsets, all_peaks) scores = subsets[:, -2] return poses, scores
def update_core(self): #convert incoming array into variables with either cpu/gpu compatibility data = Variable(self.converter(self.get_iterator('main').next(), self.device)) n, c, h, w = data.shape # Get the ground truth and the sequential inout that is to be fed to the # network seq, gt = split_axis(data, [c-3], 1) # get rid of memory del data output = None total_loss_dis_adv = 0 total_loss_gen_adv = 0 for i in range(1, 5): # Downscaling of ground truth images for loss calculations if i != 4: downscaled_gt = resize_images(gt, (int(h / 2 ** (4 - i)), int(w / 2 ** (4 - i)))) downscaled_seq = resize_images(seq, (int(h / 2 ** (4 - i)), int(w / 2 ** (4 - i)))) else: downscaled_gt = gt downscaled_seq = seq output = self.GenNetwork.singleforward(i, downscaled_seq, output) dis_output_fake = self.DisNetwork.singleforward(i,output) dis_outplut_real = self.DisNetwork.singleforward(i, downscaled_gt) loss_dis = (loss_target1(dis_outplut_real) + loss_target0(dis_output_fake)) / 2 loss_gen = loss_target1(dis_output_fake) total_loss_dis_adv += loss_dis total_loss_gen_adv += loss_gen loss_l2 = l2_loss(output, gt) loss_gdl = gradient_loss(output, gt) composite_gen_loss = self.LAM_LP*loss_l2 + self.LAM_GDL*loss_gdl + self.LAM_ADV*total_loss_gen_adv report({'L2Loss':loss_l2},self.GenNetwork) report({'GDL':loss_gdl},self.GenNetwork) report({'AdvLoss':total_loss_gen_adv},self.GenNetwork) report({'DisLoss':total_loss_dis_adv},self.DisNetwork) report({'CompositeGenLoss':composite_gen_loss},self.GenNetwork) # TODO: Come up with a more elegant way self.DisNetwork.cleargrads() self.GenNetwork.cleargrads() composite_gen_loss.backward() self._optimizers["GeneratorNetwork"].update() self.DisNetwork.cleargrads() self.GenNetwork.cleargrads() total_loss_dis_adv.backward() self._optimizers["DiscriminatorNetwork"].update()
def __call__(self, orig_img, fast_mode=False): orig_img_h, orig_img_w, _ = orig_img.shape resized_output_img_w, resized_output_img_h = self.compute_optimal_size( orig_img, params['heatmap_size']) pafs_sum = 0 heatmaps_sum = 0 # use only the first scale on fast mode scales = [params['inference_scales'][0] ] if fast_mode else params['inference_scales'] for scale in scales: print("Inference scale: %.1f..." % (scale)) img_size = int(params['inference_img_size'] * scale) resized_input_img_w, resized_input_img_h = self.compute_optimal_size( orig_img, img_size) resized_image = cv2.resize( orig_img, (resized_input_img_w, resized_input_img_h)) x_data = np.array(resized_image[np.newaxis], dtype=np.float32 ).transpose(0, 3, 1, 2) / 256 - 0.5 if self.device >= 0: x_data = cuda.to_gpu(x_data) h1s, h2s = self.model(x_data) pafs_sum += F.resize_images( h1s[-1], (resized_output_img_h, resized_output_img_w)).data[0] heatmaps_sum += F.resize_images( h2s[-1], (resized_output_img_h, resized_output_img_w)).data[0] pafs = pafs_sum / len(scales) heatmaps = heatmaps_sum / len(scales) if self.device >= 0: pafs = cuda.to_cpu(pafs) all_peaks = self.compute_peaks_from_heatmaps(heatmaps) all_peaks_flatten = np.array([ peak for peaks_each_category in all_peaks for peak in peaks_each_category ]) if len(all_peaks_flatten) == 0: return np.empty((0, len(JointType), 3)) all_connections = self.compute_connections(pafs, all_peaks, all_peaks_flatten, resized_output_img_w, params) subsets = self.grouping_key_points(all_connections, all_peaks_flatten, params) all_peaks_flatten[:, 0] *= orig_img_w / resized_output_img_w all_peaks_flatten[:, 1] *= orig_img_h / resized_output_img_h person_pose_array = self.subsets_to_person_pose_array( subsets, all_peaks_flatten) return person_pose_array
def viz_input(args, config): """Visualize input for network.""" subprocess.call(['sh', "setup.sh"]) model = get_model(config["model"]) devices = parse_devices(config['gpus'], config['updater']['name']) test_data = load_dataset_test(config["dataset"]) test_iter = create_iterator_test(test_data, config['iterator']) dataset_config = config['dataset']['test']['args'] for i_b,batch in enumerate(test_iter): #gt_prob, gt_reg are ground truth x_list, counter, indexes_list, gt_prob, gt_reg, batch, n_no_empty = batch[0] #print(gt_prob.shape) #print(gt_reg[0,:,:].shape) gt_prob = F.resize_images(gt_prob.astype("f")[np.newaxis, np.newaxis], (400, 352))[0, 0].data gt_reg = F.resize_images(gt_reg[7,:,:].astype("f")[np.newaxis, np.newaxis], (400, 352))[0, 0].data len_image = len(x_list) fig, axes = plt.subplots(2, 3, figsize=(20, 7)) thres_list = dataset_config['thres_t'] for index, (x, indexes) in enumerate(zip(x_list, indexes_list)): x = x[:, :, 0] x = feature_to_voxel(x, indexes, 3, 10, 400, 352, batch) input_x = chainer.cuda.to_cpu(x.data.astype("f")[0]) input_x = input_x.max(axis=(0, 1)) image = np.ones(input_x.shape, dtype='f') * 0.95 slice1 = image.copy() slice2 = image.copy() slice3 = image.copy() # lidar data slice1[input_x != 0] = 0.3 slice2[input_x != 0] = .8 slice3[input_x != 0] = 0.2 # probability of each box slice1[gt_prob != 0] = 1 slice2[gt_prob != 0] = 0 slice3[gt_prob != 0] = 0 # regression for ground truth slice1[gt_reg != 0] = 0 slice2[gt_reg != 0] = 0 slice3[gt_reg != 0] = 1 image = np.ones((400, 352, 3)) image[:, :, 0] = slice1 image[:, :, 1] = slice2 image[:, :, 2] = slice3 i = int(index / 3) j = int(index % 3) axes[i, j].imshow(image[::-1][100:300], cmap="hot") axes[i, j].axis('off') axes[i, j].set_title("Thres: {}".format(thres_list[index])) plt.tight_layout() plt.savefig("images/vis/"+"batch_"+str(i_b)+".png") plt.close()
def __call__(self, x): in_size = self.in_size if self.fixed_size else x.shape[2:] x, _ = self.backbone(x) x, y, z = self.head(x) x = F.resize_images(x, output_shape=in_size) if self.aux: y = F.resize_images(y, output_shape=in_size) z = F.resize_images(z, output_shape=in_size) return x, y, z else: return x
def __call__(self, top, middle, bottom): h = self.refine_1_1(bottom) h = self.refine_1_2(h) h = self.refine_1_3(h) refine_1_upsample = F.resize_images(h, (top.shape[2], top.shape[3])) h = self.refine_2_1(middle) h = self.refine_2_2(h) refine_2_upsample = F.resize_images(h, (top.shape[2], top.shape[3])) h_top = self.refine_3_1(top) refine_concat = F.concat((refine_1_upsample, refine_2_upsample, h_top), axis=1) return refine_concat
def compute_loss(self, images, pafs_ys, heatmaps_ys, ground_truth_pafs, ground_truth_heatmaps, ignore_mask): """ ground_truth_pafs : list of grount truth paf """ heatmap_losses = [] paf_losses = [] loss = 0.0 paf_masks = ignore_mask[:, None].repeat(ground_truth_pafs.shape[1], axis=1) heatmap_masks = ignore_mask[:, None].repeat( ground_truth_heatmaps.shape[1], axis=1) # compute loss on each stage for pafs_y, heatmaps_y in zip(pafs_ys, heatmaps_ys): stage_ground_truth_pafs = ground_truth_pafs.copy() stage_ground_truth_heatmaps = ground_truth_heatmaps.copy() stage_paf_masks = paf_masks.copy() stage_heatmap_masks = heatmap_masks.copy() if pafs_y.shape != stage_ground_truth_pafs.shape: stage_ground_truth_pafs = F.resize_images( stage_ground_truth_pafs, pafs_y.shape[2:]).data stage_ground_truth_heatmaps = F.resize_images( stage_ground_truth_heatmaps, pafs_y.shape[2:]).data stage_paf_masks = F.resize_images(stage_paf_masks.astype('f'), pafs_y.shape[2:]).data > 0 stage_heatmap_masks = F.resize_images( stage_heatmap_masks.astype('f'), pafs_y.shape[2:]).data > 0 stage_ground_truth_pafs[stage_paf_masks == True] = \ pafs_y.data[stage_paf_masks == True] stage_ground_truth_heatmaps[stage_heatmap_masks == True] = \ heatmaps_y.data[stage_heatmap_masks == True] pafs_loss = F.mean_squared_error(pafs_y, stage_ground_truth_pafs) heatmaps_loss = F.mean_squared_error(heatmaps_y, stage_ground_truth_heatmaps) loss += pafs_loss + heatmaps_loss paf_losses.append(float(chainer.cuda.to_cpu(pafs_loss.data))) heatmap_losses.append( float(chainer.cuda.to_cpu(heatmaps_loss.data))) return loss, paf_losses, heatmap_losses
def __call__(self, refine_concat): h = self.vect_conv1(refine_concat) h = self.vect_conv2(h) if self.upsample: h = F.resize_images(h, (2 * h.shape[2], 2 * h.shape[3])) h = self.vect_conv3(h) vect_out = self.vect_conv4(h) h = self.heat_conv1(refine_concat) h = self.heat_conv2(h) if self.upsample: h = F.resize_images(h, (2 * h.shape[2], 2 * h.shape[3])) h = self.heat_conv3(h) heat_out = F.sigmoid(self.heat_conv4(h)) return vect_out, heat_out
def get_example(self, i): if i % 100 == 0 and i != 0: percentage = i * 100 / len(self.imgs_file_list) print("Progress: {0:d}%".format(int(percentage))) calib_dir = self.calib_dir_list[i] imgs_path = self.imgs_file_list[i] tgt_img_path = imgs_path[0] src_imgs_path = imgs_path[1] tgt_img = load_as_float_norm(tgt_img_path) src_imgs = [load_as_float_norm(path) for path in src_imgs_path] gt_pose = read_file_list(self.gt_files[i]) orig_shape = tgt_img.shape[:2] tgt_img = F.resize_images(tgt_img[None], (self.height, self.width)).data[0] src_imgs = F.resize_images(np.array(src_imgs, dtype='f'), (self.height, self.width)).data return tgt_img, src_imgs, [], gt_pose
def __call__(self, x): h = self.trunk(x) h_cp = F.dropout(self.cbr_cp(h), ratio=0.1) h_cp = F.tanh(self.out_cp(h_cp)) h_cp = F.resize_images(h_cp, x.shape[2:]) h_ocp = F.dropout(self.cbr_ocp(h), ratio=0.1) h_ocp = F.tanh(self.out_ocp(h_ocp)) h_ocp = F.resize_images(h_ocp, x.shape[2:]) h = F.dropout(self.cbr_main(h), ratio=0.1) h = self.out_main(h) h = F.resize_images(h, x.shape[2:]) return h, h_cp, h_ocp
def __call__(self, orig_img): orig_img = orig_img.copy() if self.precise: return self.detect_precise(orig_img) orig_img_h, orig_img_w, _ = orig_img.shape input_w, input_h = self.compute_optimal_size(orig_img, params['inference_img_size']) map_w, map_h = self.compute_optimal_size(orig_img, params['heatmap_size']) resized_image = cv2.resize(orig_img, (input_w, input_h)) x_data = self.preprocess(resized_image) # if self.device >= 0: # x_data = cuda.to_gpu(x_data) print("x_data.shape",x_data.shape,type(x_data)) # IE_bin = "models/FP32/pose_iter_440000.bin" # IE_xml = "models/FP32/pose_iter_440000.xml" if self.device == 'CPU' : data_type='FP32' if self.device == 'MYRIAD': data_type='FP16' resS = IEresult(self.IE_xml, self.IE_bin, self.device, x_data) print("IEresult done",resS.keys()) for k in resS.keys(): if resS[k].shape[1]==38: H1S=resS[k] if resS[k].shape[1]==19: H2S=resS[k] h1s = [ Variable(H1S) ] h2s = [ Variable(H2S) ] pafs = F.resize_images(h1s[-1], (map_h, map_w)).data[0] heatmaps = F.resize_images(h2s[-1], (map_h, map_w)).data[0] print("pafs.shape",pafs.shape,type(pafs.shape)) print("heatmaps.shape",heatmaps.shape,type(heatmaps.shape)) # if self.device >= 0: # pafs = pafs.get() # cuda.get_device_from_id(self.device).synchronize() all_peaks = self.compute_peaks_from_heatmaps(heatmaps) if len(all_peaks) == 0: return np.empty((0, len(JointType), 3)), np.empty(0) all_connections = self.compute_connections(pafs, all_peaks, map_w, params) subsets = self.grouping_key_points(all_connections, all_peaks, params) all_peaks[:, 1] *= orig_img_w / map_w all_peaks[:, 2] *= orig_img_h / map_h poses = self.subsets_to_pose_array(subsets, all_peaks) scores = subsets[:, -2] return poses, scores
def encode(self, image, obj, desc, num): xp = cuda.cupy cuda.get_device(GPU.gpus_to_use[num % GPU.num_gpus]).use() obj = np.asarray(obj, dtype=np.float32) obj = np.repeat(obj[np.newaxis], image.shape[0], axis=0) desc = np.asarray(desc, dtype=np.float32) desc = np.repeat(desc[np.newaxis], image.shape[0], axis=0) o_in = cuda.to_gpu(obj, GPU.gpus_to_use[num % GPU.num_gpus]) d_in = cuda.to_gpu(desc, GPU.gpus_to_use[num % GPU.num_gpus]) x_in = cuda.to_gpu(image, GPU.gpus_to_use[num % GPU.num_gpus]) att, _, _ = self.enc_models[num % 2](Variable(x_in), Variable(o_in), Variable(d_in), train=False) att = F.reshape(att, (-1, 1, self.att_size, self.att_size)) att = F.resize_images(att, (self.image_size, self.image_size)) cir_z, _, _, _ = self.att_enc_models[num % 2](Variable(x_in) * att, train=False) return cir_z, F.squeeze(F.concat((o_in[0], d_in[0]), axis=-1))
def inception_forward(model, ims, batch_size): n, c, w, h = ims.shape n_batches = int(math.ceil(float(n) / float(batch_size))) xp = model.xp # Compute the softmax predicitions for for all images, split into batches # in order to fit in memory ys = xp.empty((n, 1008), dtype=xp.float32) # Softmax container for i in range(n_batches): batch_start = (i * batch_size) batch_end = min((i + 1) * batch_size, n) ims_batch = ims[batch_start:batch_end] ims_batch = xp.asarray(ims_batch) # To GPU if using CuPy ims_batch = Variable(ims_batch) # Resize image to the shape expected by the inception module if (w, h) != (299, 299): ims_batch = F.resize_images(ims_batch, (299, 299)) # bilinear # Feed images to the inception module to get the softmax predictions with chainer.using_config('train', False), chainer.using_config( 'enable_backprop', False): y = model(ims_batch) ys[batch_start:batch_end] = y.data ys = ys[:, 1:1001] # 0 and 1001-1008 are the dummies return ys
def rasterize_silhouettes( faces, image_size=DEFAULT_IMAGE_SIZE, anti_aliasing=DEFAULT_ANTI_ALIASING, near=DEFAULT_NEAR, far=DEFAULT_FAR, eps=DEFAULT_EPS, background_color=DEFAULT_BACKGROUND_COLOR, ): if anti_aliasing: # 2x super-sampling faces = faces * (2 * image_size - 1) / (2 * image_size - 2) images = neural_renderer.Rasterize( image_size * 2, near, far, eps, background_color, return_rgb=False, return_alpha=True, return_depth=False)( faces)[1] else: images = neural_renderer.Rasterize( image_size, near, far, eps, background_color, return_rgb=False, return_alpha=True, return_depth=False)( faces)[1] # transpose & vertical flip images = images[:, ::-1, :] if anti_aliasing: # 0.5x down-sampling images = cf.resize_images(images[:, None, :, :], (image_size, image_size))[:, 0] return images
def forward(self, inputs, device): x, = inputs output_shape = self.output_shape[2:] y = functions.resize_images(x, output_shape, align_corners=self.align_corners) return y,
def _hand_estimate_chainer_backend_each(self, hand_bgr, cx, cy, left_hand): xp = self.hand_net.xp if left_hand: hand_bgr = cv2.flip(hand_bgr, 1) # 1 = vertical resized = cv2.resize(hand_bgr, (368, 368), interpolation=cv2.INTER_CUBIC) x = np.array(resized[np.newaxis], dtype=np.float32) x = x.transpose(0, 3, 1, 2) x = x / 256 - 0.5 if self.gpu >= 0: x = chainer.cuda.to_gpu(x) x = chainer.Variable(x) heatmaps = self.hand_net(x) heatmaps = F.resize_images(heatmaps[-1], hand_bgr.shape[:2])[0] if self.gpu >= 0: heatmaps.to_cpu() heatmaps = heatmaps.array if left_hand: heatmaps = heatmaps.transpose(1, 2, 0) heatmaps = cv2.flip(heatmaps, 1) heatmaps = heatmaps.transpose(2, 0, 1) # get peak on heatmap hmaps = [] if xp == np: # cpu for i in range(heatmaps.shape[0] - 1): heatmap = gaussian_filter(heatmaps[i], sigma=self.hand_gaussian_sigma) hmaps.append(heatmap) else: heatmaps = chainer.cuda.to_gpu(heatmaps) heatmaps = F.convolution_2d(heatmaps[:, xp.newaxis], self.hand_gaussian_kernel, stride=1, pad=int(self.hand_gaussian_ksize / 2)) heatmaps = chainer.cuda.to_cpu(xp.squeeze(heatmaps.array)) for heatmap in heatmaps[:-1]: hmaps.append(heatmap) keypoints = [] idx_offset = 0 if left_hand: idx_offset += len(hmaps) for i, heatmap in enumerate(hmaps): conf = heatmap.max() cds = np.array(np.where(heatmap == conf)).flatten().tolist() py = cy + cds[0] - hand_bgr.shape[0] / 2 px = cx + cds[1] - hand_bgr.shape[1] / 2 keypoints.append({ 'x': px, 'y': py, 'score': conf, 'limb': self.index2handname[idx_offset + i] }) return keypoints
def __call__(self, last_fm_middle_top): h = self.vect_conv1(last_fm_middle_top) h = self.vect_conv2(h) if self.upsample: h = F.resize_images(h, (2 * h.shape[2], 2 * h.shape[3])) h = self.vect_conv3(h) vect_out = self.vect_conv4(h) h = self.heat_conv1(last_fm_middle_top) h = self.heat_conv2(h) if self.upsample: h = F.resize_images(h, (2 * h.shape[2], 2 * h.shape[3])) h = self.heat_conv3(h) heat_out = self.heat_conv4(h) return vect_out, heat_out
def __call__(self, hand_img, fast_mode=False, hand_type="right"): if hand_type == "left": hand_img = cv2.flip(hand_img, 1) hand_img_h, hand_img_w, _ = hand_img.shape resized_image = cv2.resize(hand_img, (params["hand_inference_img_size"], params["hand_inference_img_size"])) x_data = np.array(resized_image[np.newaxis], dtype=np.float32).transpose(0, 3, 1, 2) / 256 - 0.5 if self.device >= 0: x_data = cuda.to_gpu(x_data) hs = self.model(x_data) heatmaps = F.resize_images(hs[-1], (hand_img_h, hand_img_w)).data[0] if self.device >= 0: heatmaps = heatmaps.get() if hand_type == "left": heatmaps = cv2.flip(heatmaps.transpose(1, 2, 0), 1).transpose(2, 0, 1) keypoints = self.compute_peaks_from_heatmaps(heatmaps) return keypoints
def get_mean_cov(model, ims, batch_size=100): n, c, w, h = ims.shape n_batches = int(math.ceil(float(n) / float(batch_size))) xp = model.xp print('Batch size:', batch_size) print('Total number of images:', n) print('Total number of batches:', n_batches) ys = xp.empty((n, 2048), dtype=xp.float32) for i in range(n_batches): print('Running batch', i + 1, '/', n_batches, '...') batch_start = (i * batch_size) batch_end = min((i + 1) * batch_size, n) ims_batch = ims[batch_start:batch_end] ims_batch = xp.asarray(ims_batch) # To GPU if using CuPy ims_batch = Variable(ims_batch) # Resize image to the shape expected by the inception module if (w, h) != (299, 299): ims_batch = F.resize_images(ims_batch, (299, 299)) # bilinear # Feed images to the inception module to get the features with chainer.using_config('train', False), chainer.using_config('enable_backprop', False): y = model(ims_batch, get_feature=True) ys[batch_start:batch_end] = y.data mean = chainer.cuda.to_cpu(xp.mean(ys, axis=0)) # cov = F.cross_covariance(ys, ys, reduce="no").data.get() cov = np.cov(chainer.cuda.to_cpu(ys).T) return mean, cov
def forward(self, x1, x2): x1 = F.relu(self.bn1(self.conv1(x1))) x2 = F.relu(self.bn2(self.conv2(x2))) x2 = F.resize_images(x2, (x1.shape[2], x1.shape[3])) x = F.concat((x1, x2), axis=1) return x
def _hand_estimate_chainer_backend_each(self, hand_bgr, cx, cy, left_hand): xp = self.hand_net.xp device_id = self.hand_net._device_id if left_hand: hand_bgr = cv2.flip(hand_bgr, 1) # 1 = vertical resized = cv2.resize(hand_bgr, (368, 368), interpolation=cv2.INTER_CUBIC) x = np.array(resized[np.newaxis], dtype=np.float32) x = x.transpose(0, 3, 1, 2) x = x / 256 - 0.5 if self.gpu >= 0: with chainer.cuda.get_device_from_id(device_id): x = chainer.cuda.to_gpu(x) x = chainer.Variable(x) heatmaps = self.hand_net(x) heatmaps = F.resize_images(heatmaps[-1], hand_bgr.shape[:2])[0] if self.gpu >= 0: heatmaps.to_cpu() heatmaps = heatmaps.array if left_hand: heatmaps = heatmaps.transpose(1, 2, 0) heatmaps = cv2.flip(heatmaps, 1) heatmaps = heatmaps.transpose(2, 0, 1) # get peak on heatmap hmaps = [] if xp == np: # cpu for i in range(heatmaps.shape[0] - 1): heatmap = gaussian_filter(heatmaps[i], sigma=self.hand_gaussian_sigma) hmaps.append(heatmap) else: with chainer.cuda.get_device_from_id(device_id): heatmaps = chainer.cuda.to_gpu(heatmaps) heatmaps = F.convolution_2d( heatmaps[:, xp.newaxis], self.hand_gaussian_kernel, stride=1, pad=int(self.hand_gaussian_ksize / 2)) heatmaps = chainer.cuda.to_cpu(xp.squeeze(heatmaps.array)) for heatmap in heatmaps[:-1]: hmaps.append(heatmap) keypoints = [] idx_offset = 0 if left_hand: idx_offset += len(hmaps) for i, heatmap in enumerate(hmaps): conf = heatmap.max() cds = np.array(np.where(heatmap==conf)).flatten().tolist() py = cy + cds[0] - hand_bgr.shape[0] / 2 px = cx + cds[1] - hand_bgr.shape[1] / 2 keypoints.append({'x': px, 'y': py, 'score': conf, 'limb': self.index2handname[idx_offset+i]}) return keypoints
def __call__(self, orig_img, fast_mode=False): orig_img_h, orig_img_w, _ = orig_img.shape resized_output_img_w, resized_output_img_h = self.compute_optimal_size(orig_img, params['heatmap_size']) pafs_sum = 0 heatmaps_sum = 0 # use only the first scale on fast mode scales = [params['inference_scales'][0]] if fast_mode else params['inference_scales'] for scale in scales: print("Inference scale: %.1f..." % (scale)) img_size = int(params['inference_img_size'] * scale) resized_input_img_w, resized_input_img_h = self.compute_optimal_size(orig_img, img_size) resized_image = cv2.resize(orig_img, (resized_input_img_w, resized_input_img_h)) x_data = np.array(resized_image[np.newaxis], dtype=np.float32).transpose(0, 3, 1, 2) / 256 - 0.5 if self.device >= 0: x_data = cuda.to_gpu(x_data) h1s, h2s = self.model(x_data) pafs_sum += F.resize_images(h1s[-1], (resized_output_img_h, resized_output_img_w)).data[0] heatmaps_sum += F.resize_images(h2s[-1], (resized_output_img_h, resized_output_img_w)).data[0] pafs = pafs_sum / len(scales) heatmaps = heatmaps_sum / len(scales) if self.device >= 0: pafs = cuda.to_cpu(pafs) all_peaks = self.compute_peaks_from_heatmaps(heatmaps) all_peaks_flatten = np.array([peak for peaks_each_category in all_peaks for peak in peaks_each_category]) if len(all_peaks_flatten) == 0: return np.empty((0, len(JointType), 3)) all_connections = self.compute_connections(pafs, all_peaks, all_peaks_flatten, resized_output_img_w, params) subsets = self.grouping_key_points(all_connections, all_peaks_flatten, params) all_peaks_flatten[:, 0] *= orig_img_w / resized_output_img_w all_peaks_flatten[:, 1] *= orig_img_h / resized_output_img_h person_pose_array = self.subsets_to_person_pose_array(subsets, all_peaks_flatten) return person_pose_array
def __call__(self, face_img, fast_mode=False): face_img_h, face_img_w, _ = face_img.shape resized_image = cv2.resize(face_img, (params["face_inference_img_size"], params["face_inference_img_size"])) x_data = np.array(resized_image[np.newaxis], dtype=np.float32).transpose(0, 3, 1, 2) / 256 - 0.5 if self.device >= 0: x_data = cuda.to_gpu(x_data) hs = self.model(x_data) heatmaps = F.resize_images(hs[-1], (face_img_h, face_img_w)).data[0] keypoints = self.compute_peaks_from_heatmaps(heatmaps) return keypoints
def _pose_estimate_chainer_backend(self, bgr_img): if self.gpu >= 0: chainer.cuda.get_device_from_id(self.gpu).use() xp = self.pose_net.xp org_h, org_w, _ = bgr_img.shape if not (self.width is None or self.height is None): bgr_img = cv2.resize(bgr_img, (self.width, self.height)) heatmap_avg = xp.zeros((bgr_img.shape[0], bgr_img.shape[1], 19), dtype=np.float32) paf_avg = xp.zeros((bgr_img.shape[0], bgr_img.shape[1], 38), dtype=np.float32) for scale in self.scales: img = cv2.resize(bgr_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) padded_img, pad = padRightDownCorner( img, self.stride, self.pad_value) x = np.transpose(np.float32( padded_img[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5 if self.gpu >= 0: x = chainer.cuda.to_gpu(x) x = chainer.Variable(x) pafs, heatmaps = self.pose_net(x) paf = pafs[-1] heatmap = heatmaps[-1] # extract outputs, resize, and remove padding heatmap = F.resize_images( heatmap, (heatmap.data.shape[2] * self.stride, heatmap.data.shape[3] * self.stride)) heatmap = heatmap[:, :, :padded_img.shape[0] - pad[2], :padded_img.shape[1] - pad[3]] heatmap = F.resize_images( heatmap, (bgr_img.shape[0], bgr_img.shape[1])) heatmap = xp.transpose(xp.squeeze(heatmap.data), (1, 2, 0)) paf = F.resize_images( paf, (paf.data.shape[2] * self.stride, paf.data.shape[3] * self.stride)) paf = paf[:, :, :padded_img.shape[0] - pad[2], :padded_img.shape[1] - pad[3]] paf = F.resize_images(paf, (bgr_img.shape[0], bgr_img.shape[1])) paf = xp.transpose(xp.squeeze(paf.data), (1, 2, 0)) coeff = 1.0 / len(self.scales) paf_avg += paf * coeff heatmap_avg += heatmap * coeff heatmav_left = xp.zeros_like(heatmap_avg) heatmav_left[1:, :] = heatmap_avg[:-1, :] heatmav_right = xp.zeros_like(heatmap_avg) heatmav_right[:-1, :] = heatmap_avg[1:, :] heatmav_up = xp.zeros_like(heatmap_avg) heatmav_up[:, 1:] = heatmap_avg[:, :-1] heatmav_down = xp.zeros_like(heatmap_avg) heatmav_down[:, :-1] = heatmap_avg[:, 1:] peaks_binary = (heatmap_avg >= heatmav_left) & \ (heatmap_avg >= heatmav_right) & \ (heatmap_avg >= heatmav_up) & \ (heatmap_avg >= heatmav_down) & \ (heatmap_avg > self.thre1) peaks = xp.array(xp.nonzero(peaks_binary[..., :len(self.index2limbname)-1]), dtype=np.int32).T peak_counter = peaks.shape[0] all_peaks = xp.zeros((peak_counter, 4), dtype=np.float32) all_peaks[:, 0] = peaks[:, 1] all_peaks[:, 1] = peaks[:, 0] all_peaks[:, 2] = heatmap_avg[peaks.T.tolist()] peaks_order = peaks[..., 2] try: all_peaks = all_peaks[xp.argsort(peaks_order)] except AttributeError: # cupy.argsort is not available at cupy==1.0.1 peaks_order = chainer.cuda.to_cpu(peaks_order) all_peaks = all_peaks[np.argsort(peaks_order)] all_peaks[:, 3] = xp.arange(peak_counter, dtype=np.float32) if self.gpu >= 0: all_peaks = chainer.cuda.to_cpu(all_peaks) peaks_order = chainer.cuda.to_cpu(peaks_order) all_peaks = np.split(all_peaks, np.cumsum( np.bincount(peaks_order, minlength=len(self.index2limbname)-1))) connection_all = [] mid_num = 10 eps = 1e-8 score_mid = paf_avg[:, :, [[x - 19 for x in self.map_idx[k]] for k in range(len(self.map_idx))]] cands = np.array(all_peaks, dtype=object)[ np.array(self.limb_sequence, dtype=np.int32) - 1] candAs = cands[:, 0] candBs = cands[:, 1] nAs = np.array([len(candA) for candA in candAs]) nBs = np.array([len(candB) for candB in candBs]) target_indices = np.nonzero(np.logical_and(nAs != 0, nBs != 0))[0] if len(target_indices) == 0: return [], [] all_candidates_A = [np.repeat(np.array(tmp_candA, dtype=np.float32), nB, axis=0) for tmp_candA, nB in zip(candAs, nBs)] all_candidates_B = [np.tile(np.array(tmp_candB, dtype=np.float32), (nA, 1)) for tmp_candB, nA in zip(candBs, nAs)] target_candidates_B = [all_candidates_B[index] for index in target_indices] target_candidates_A = [all_candidates_A[index] for index in target_indices] vec = np.vstack(target_candidates_B)[ :, :2] - np.vstack(target_candidates_A)[:, :2] if self.gpu >= 0: vec = chainer.cuda.to_gpu(vec) norm = xp.sqrt(xp.sum(vec ** 2, axis=1)) + eps vec = vec / norm[:, None] start_end = zip(np.round(np.mgrid[np.vstack(target_candidates_A)[:, 1].reshape(-1, 1):np.vstack(target_candidates_B)[:, 1].reshape(-1, 1):(mid_num * 1j)]).astype(np.int32), np.round(np.mgrid[np.vstack(target_candidates_A)[:, 0].reshape(-1, 1):np.vstack( target_candidates_B)[:, 0].reshape(-1, 1):(mid_num * 1j)]).astype(np.int32), np.concatenate([[[index] * mid_num for i in range(len(c))] for index, c in zip(target_indices, target_candidates_B)]),) v = score_mid[np.concatenate( start_end, axis=1).tolist()].reshape(-1, mid_num, 2) score_midpts = xp.sum(v * xp.repeat(vec, (mid_num), axis=0).reshape(-1, mid_num, 2), axis=2) score_with_dist_prior = xp.sum(score_midpts, axis=1) / mid_num + \ xp.minimum(0.5 * bgr_img.shape[0] / norm - 1, xp.zeros_like(norm, dtype=np.float32)) c1 = xp.sum(score_midpts > self.thre2, axis=1) > 0.8 * mid_num c2 = score_with_dist_prior > 0.0 criterion = xp.logical_and(c1, c2) indices_bins = np.cumsum(nAs * nBs) indices_bins = np.concatenate( [np.zeros(1), indices_bins]).astype(np.int32) target_candidate_indices = xp.nonzero(criterion)[0] if self.gpu >= 0: target_candidate_indices = chainer.cuda.to_cpu( target_candidate_indices) score_with_dist_prior = chainer.cuda.to_cpu(score_with_dist_prior) k_s = np.digitize(target_candidate_indices, indices_bins) - 1 i_s = (target_candidate_indices - (indices_bins[k_s])) // nBs[k_s] j_s = (target_candidate_indices - (indices_bins[k_s])) % nBs[k_s] connection_candidate = np.concatenate([k_s.reshape(-1, 1), i_s.reshape(-1, 1), j_s.reshape(-1, 1), score_with_dist_prior[ target_candidate_indices][None, ].T, (score_with_dist_prior[target_candidate_indices][None, ] + np.concatenate(target_candidates_A)[target_candidate_indices, 2] + np.concatenate(target_candidates_B)[target_candidate_indices, 2]).T], axis=1) sorted_indices = np.argsort( connection_candidate[:, 0] * 100 - connection_candidate[:, 3]) connection_all = [] for _ in range(0, 19): connection = np.zeros((0, 5), dtype=np.float32) connection_all.append(connection) for c_candidate in connection_candidate[sorted_indices]: k, i, j = c_candidate[0:3].astype(np.int32) score = c_candidate[3] if(len(connection_all[k]) >= min(nAs[k], nBs[k])): continue i *= nBs[k] if(i not in connection_all[k][:, 3] and j not in connection_all[k][:, 4]): connection_all[k] = np.vstack([connection_all[k], np.array( [all_candidates_A[k][i][3], all_candidates_B[k][j][3], score, i, j], dtype=np.float32)]) joint_cands_indices = -1 * np.ones((0, 20)) candidate = np.array( [item for sublist in all_peaks for item in sublist]) for k in range(len(self.map_idx)): partAs = connection_all[k][:, 0] partBs = connection_all[k][:, 1] indexA, indexB = np.array(self.limb_sequence[k]) - 1 for i in range(len(connection_all[k])): # = 1:size(temp,1) found = 0 joint_cands_indices_idx = [-1, -1] # 1:size(joint_cands_indices,1): for j in range(len(joint_cands_indices)): if joint_cands_indices[j][indexA] == float(partAs[i]) or joint_cands_indices[j][indexB] == float(partBs[i]): joint_cands_indices_idx[found] = j found += 1 if found == 1: j = joint_cands_indices_idx[0] if(joint_cands_indices[j][indexB] != float(partBs[i])): joint_cands_indices[j][indexB] = partBs[i] joint_cands_indices[j][-1] += 1 joint_cands_indices[ j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] joint_cands_indices[ j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] elif found == 2: # if found 2 and disjoint, merge them j1, j2 = joint_cands_indices_idx membership = ((joint_cands_indices[j1] >= 0).astype( int) + (joint_cands_indices[j2] >= 0).astype(int))[:-2] if len(np.nonzero(membership == 2)[0]) == 0: # merge joint_cands_indices[j1][ :-2] += (joint_cands_indices[j2][:-2] + 1) joint_cands_indices[ j1][-2:] += joint_cands_indices[j2][-2:] joint_cands_indices[j1][-2] += connection_all[k][i][2] joint_cands_indices = np.delete( joint_cands_indices, j2, 0) else: # as like found == 1 joint_cands_indices[j1][indexB] = partBs[i] joint_cands_indices[j1][-1] += 1 joint_cands_indices[ j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] # if find no partA in the joint_cands_indices, create a new # joint_cands_indices elif not found and k < len(self.index2limbname) - 2: row = -1 * np.ones(20) row[indexA] = partAs[i] row[indexB] = partBs[i] row[-1] = 2 row[-2] = sum(candidate[connection_all[k] [i, :2].astype(int), 2]) + connection_all[k][i][2] joint_cands_indices = np.vstack([joint_cands_indices, row]) # delete some rows of joint_cands_indices which has few parts occur deleteIdx = [] for i in range(len(joint_cands_indices)): if joint_cands_indices[i][-1] < 4 or joint_cands_indices[i][-2] / joint_cands_indices[i][-1] < 0.4: deleteIdx.append(i) joint_cands_indices = np.delete(joint_cands_indices, deleteIdx, axis=0) return self._extract_joint_position(joint_cands_indices, candidate), all_peaks
def forward(self, inputs, device): x, = inputs output_shape = self.output_shape[2:] y = functions.resize_images(x, output_shape) return y,
def f(x): return functions.resize_images(x, output_shape)
def predict_depth(self, rgb, mask_score, depth_viz, rgb_pool5): # conv_depth_1 h = F.relu(self.conv_depth_1_1(depth_viz)) h = F.relu(self.conv_depth_1_2(h)) h = F.max_pooling_2d(h, 2, stride=2, pad=0) depth_pool1 = h # 1/2 # conv_depth_2 h = F.relu(self.conv_depth_2_1(depth_pool1)) h = F.relu(self.conv_depth_2_2(h)) h = F.max_pooling_2d(h, 2, stride=2, pad=0) depth_pool2 = h # 1/4 # conv_depth_3 h = F.relu(self.conv_depth_3_1(depth_pool2)) h = F.relu(self.conv_depth_3_2(h)) h = F.relu(self.conv_depth_3_3(h)) h = F.max_pooling_2d(h, 2, stride=2, pad=0) depth_pool3 = h # 1/8 # conv_depth_4 h = F.relu(self.conv_depth_4_1(depth_pool3)) h = F.relu(self.conv_depth_4_2(h)) h = F.relu(self.conv_depth_4_3(h)) h = F.max_pooling_2d(h, 2, stride=2, pad=0) depth_pool4 = h # 1/16 # conv_depth_5 h = F.relu(self.conv_depth_5_1(depth_pool4)) h = F.relu(self.conv_depth_5_2(h)) h = F.relu(self.conv_depth_5_3(h)) h = F.max_pooling_2d(h, 2, stride=2, pad=0) depth_pool5 = h # 1/32 if self.masking is True: # Apply negative_mask to depth_pool5 # (N, C, H, W) -> (N, H, W) mask_pred_tmp = F.argmax(self.mask_score, axis=1) # (N, H, W) -> (N, 1, H, W), float required for resizing mask_pred_tmp = mask_pred_tmp[:, None, :, :].data.astype( self.xp.float32) # 1/1 resized_mask_pred = F.resize_images( mask_pred_tmp, (depth_pool5.shape[2], depth_pool5.shape[3])) # 1/32 depth_pool5_cp = depth_pool5 masked_depth_pool5 = depth_pool5_cp * \ (resized_mask_pred.data == 0.0).astype(self.xp.float32) else: masked_depth_pool5 = depth_pool5 if self.concat is True: # concatenate rgb_pool5 and depth_pool5 concat_pool5 = F.concat((rgb_pool5, masked_depth_pool5), axis=1) # concat_fc6 h = F.relu(self.concat_fc6(concat_pool5)) h = F.dropout(h, ratio=.5) concat_fc6 = h # 1/32 else: # concat_fc6 h = F.relu(self.depth_fc6(masked_depth_pool5)) h = F.dropout(h, ratio=.5) concat_fc6 = h # 1/32 # concat_fc7 h = F.relu(self.concat_fc7(concat_fc6)) h = F.dropout(h, ratio=.5) concat_fc7 = h # 1/32 # depth_score_fr h = self.depth_score_fr(concat_fc7) depth_score_fr = h # 1/32 # depth_score_pool3 scale_depth_pool3 = 0.0001 * depth_pool3 h = self.depth_score_pool3(scale_depth_pool3) depth_score_pool3 = h # 1/8 # depth_score_pool4 scale_depth_pool4 = 0.01 * depth_pool4 h = self.depth_score_pool4(scale_depth_pool4) depth_score_pool4 = h # 1/16 # depth upscore2 h = self.depth_upscore2(depth_score_fr) depth_upscore2 = h # 1/16 # depth_score_pool4c h = depth_score_pool4[:, :, 5:5 + depth_upscore2.data.shape[2], 5:5 + depth_upscore2.data.shape[3]] depth_score_pool4c = h # 1/16 # depth_fuse_pool4 h = depth_upscore2 + depth_score_pool4c depth_fuse_pool4 = h # 1/16 # depth_upscore_pool4 h = self.depth_upscore_pool4(depth_fuse_pool4) depth_upscore_pool4 = h # 1/8 # depth_score_pool3c h = depth_score_pool3[:, :, 9:9 + depth_upscore_pool4.data.shape[2], 9:9 + depth_upscore_pool4.data.shape[3]] depth_score_pool3c = h # 1/8 # depth_fuse_pool3 h = depth_upscore_pool4 + depth_score_pool3c depth_fuse_pool3 = h # 1/8 # depth_upscore8 h = self.depth_upscore8(depth_fuse_pool3) depth_upscore8 = h # 1/1 # depth_score h = depth_upscore8[:, :, 31:31 + rgb.shape[2], 31:31 + rgb.shape[3]] depth_score = h # 1/1 return depth_score
def f(x): y = functions.resize_images(x, output_shape) return y * y
def check_forward(self, x, output_shape): y = functions.resize_images(x, output_shape) testing.assert_allclose(y.data, self.out)