def post_process(outputs, varss, boxes, keypoints, kk, dic_gt=None, iou_min=0.3): """Post process monoloco to output final dictionary with all information for visualizations""" dic_out = defaultdict(list) if outputs is None: return dic_out if dic_gt: boxes_gt, dds_gt = dic_gt['boxes'], dic_gt['dds'] matches = get_iou_matches(boxes, boxes_gt, thresh=iou_min) print("found {} matches with ground-truth".format(len(matches))) else: matches = [(idx, idx) for idx, _ in enumerate(boxes)] # Replicate boxes matches = reorder_matches(matches, boxes, mode='left_right') uv_shoulders = get_keypoints(keypoints, mode='shoulder') uv_centers = get_keypoints(keypoints, mode='center') xy_centers = pixel_to_camera(uv_centers, kk, 1) # Match with ground truth if available for idx, idx_gt in matches: dd_pred = float(outputs[idx][0]) ale = float(outputs[idx][1]) var_y = float(varss[idx]) dd_real = dds_gt[idx_gt] if dic_gt else dd_pred kps = keypoints[idx] box = boxes[idx] uu_s, vv_s = uv_shoulders.tolist()[idx][0:2] uu_c, vv_c = uv_centers.tolist()[idx][0:2] uv_shoulder = [round(uu_s), round(vv_s)] uv_center = [round(uu_c), round(vv_c)] xyz_real = xyz_from_distance(dd_real, xy_centers[idx]) xyz_pred = xyz_from_distance(dd_pred, xy_centers[idx]) dic_out['boxes'].append(box) dic_out['boxes_gt'].append( boxes_gt[idx_gt] if dic_gt else boxes[idx]) dic_out['dds_real'].append(dd_real) dic_out['dds_pred'].append(dd_pred) dic_out['stds_ale'].append(ale) dic_out['stds_epi'].append(var_y) dic_out['xyz_real'].append(xyz_real.squeeze().tolist()) dic_out['xyz_pred'].append(xyz_pred.squeeze().tolist()) dic_out['uv_kps'].append(kps) dic_out['uv_centers'].append(uv_center) dic_out['uv_shoulders'].append(uv_shoulder) return dic_out
def get_catalog_data(self, compute_descriptors=True): iterator = self.catalog_images_paths if self.verbose: iterator = tqdm(iterator, desc="Get catalog data") self.catalog_data = { "keypoints": [], "descriptors": [], "labels": [], "shapes": [], } for catalog_path in iterator: for width in self.catalog_image_widths: img = utils.read_image(catalog_path, width=width) label = catalog_path.split("/")[-1][:-4] keypoints = utils.get_keypoints(img, self.catalog_keypoint_stride, self.keypoint_sizes) self.catalog_data["keypoints"] += list(keypoints) self.catalog_data["labels"] += [label] * len(keypoints) self.catalog_data["shapes"] += [img.shape[:2]] * len(keypoints) if compute_descriptors: descriptors = utils.get_descriptors( img, keypoints, self.feature_extractor) self.catalog_data["descriptors"] += list(descriptors) self.catalog_data["descriptors"] = np.array( self.catalog_data["descriptors"])
def predict_query(self, query_path, classifier=None, apply_threshold=True): # Read img query_img = utils.read_image(query_path, width=self.query_image_width) query_original_h, query_original_w = cv2.imread(query_path).shape[:2] # Get keypoints query_keypoints = utils.get_keypoints(query_img, self.query_keypoint_stride, self.keypoint_sizes) query_kpts_data = np.array( [utils.keypoint2data(kpt) for kpt in query_keypoints]) # Get descriptors if self.verbose: print("Query description...") query_descriptors = utils.get_descriptors(query_img, query_keypoints, self.feature_extractor) # Matching self.get_matches_results(query_kpts_data, query_descriptors, query_img.shape) # Get bboxes bboxes = self.get_raw_bboxes(query_kpts_data) bboxes = self.filter_bboxes(bboxes, query_img.shape) bboxes = self.merge_bboxes(bboxes, query_img.shape) if classifier is not None: bboxes = self.add_classifier_score(bboxes, query_img, classifier) if apply_threshold: bboxes = self.filter_bboxes_with_threshold(bboxes) bboxes = self.reshape_bboxes_original_size( bboxes, (query_original_h, query_original_w), query_img.shape[:2]) return bboxes
def compute_image_features(self, image): keypoints = utils.get_keypoints(image, self.keypoint_stride, self.keypoint_sizes) descriptors = utils.get_descriptors(image, keypoints, self.feature_extractor) distances = sklearn_pairwise.pairwise_distances(descriptors, self.vocab["features"], metric="cosine") softmax_distances = np.exp(1. - distances) / np.sum(np.exp(1. - distances), axis=1, keepdims=True) features = 1. * np.sum(softmax_distances, axis=0) / len(softmax_distances) * self.vocab["idf"] return features
def predict_query(self, query, score_threshold=None): if type(query) in [str, np.string_]: query_img = utils.read_image(query, size=self.image_size) else: query_img = cv2.resize(query, (self.image_size, self.image_size)) query_keypoints = utils.get_keypoints(query_img, self.keypoint_stride, self.keypoint_sizes) query_descriptors = utils.get_descriptors(query_img, query_keypoints, self.feature_extractor) scores = self.get_query_scores(query_descriptors) return scores
def get_catalog_descriptors(self): iterator = self.catalog_images_paths if self.verbose: iterator = tqdm(iterator, desc="Catalog description") self.catalog_descriptors = [] for path in iterator: img = utils.read_image(path, size=self.image_size) keypoints = utils.get_keypoints(img, self.keypoint_stride, self.keypoint_sizes) descriptors = utils.get_descriptors(img, keypoints, self.feature_extractor) self.catalog_descriptors.append(descriptors) self.catalog_descriptors = np.array(self.catalog_descriptors) self.catalog_descriptors = self.catalog_descriptors.reshape( -1, self.catalog_descriptors.shape[-1])
def preprocess_monoloco(keypoints, kk): """ Preprocess batches of inputs keypoints = torch tensors of (m, 3, 17) or list [3,17] Outputs = torch tensors of (m, 34) in meters normalized (z=1) and zero-centered using the center of the box """ if isinstance(keypoints, list): keypoints = torch.tensor(keypoints) if isinstance(kk, list): kk = torch.tensor(kk) # Projection in normalized image coordinates and zero-center with the center of the bounding box uv_center = get_keypoints(keypoints, mode='center') xy1_center = pixel_to_camera(uv_center, kk, 10) xy1_all = pixel_to_camera(keypoints[:, 0:2, :], kk, 10) kps_norm = xy1_all - xy1_center.unsqueeze(1) # (m, 17, 3) - (m, 1, 3) kps_out = kps_norm[:, :, 0:2].reshape(kps_norm.size()[0], -1) # no contiguous for view return kps_out
def build_vocab(self): if not self.force_vocab_compute and os.path.exists(self.vocab_path): if self.verbose: print("Loading vocab...") with open(self.vocab_path, "rb") as f: self.vocab = pickle.load(f) if self.verbose: print("Vocab loaded !") else: iterator = self.catalog_images_paths if self.verbose: iterator = tqdm(iterator, desc="Vocab construction") descriptors = [] image_ids = [] for i, image_path in enumerate(iterator): image = utils.read_image(image_path, size=self.image_size) keypoints = utils.get_keypoints(image, self.keypoint_stride, self.keypoint_sizes) desc = utils.get_descriptors(image, keypoints, self.feature_extractor) descriptors += list(desc) image_ids += [i for _ in range(len(keypoints))] descriptors = np.array(descriptors) image_ids = np.array(image_ids) if self.verbose: print("KMeans step...") kmeans = MiniBatchKMeans(n_clusters=self.vocab_size, init_size=3 * self.vocab_size) clusters = kmeans.fit_predict(descriptors) if self.verbose: print("Computing Idfs...") self.vocab = {} self.vocab["features"] = kmeans.cluster_centers_ self.vocab["idf"] = np.zeros((self.vocab["features"].shape[0],)) nb_documents = len(self.catalog_images_paths) for cluster in set(clusters): nb_documents_containing_cluster = len(set(image_ids[clusters == cluster])) self.vocab["idf"][cluster] = np.log(1. * nb_documents / nb_documents_containing_cluster) if self.verbose: print("Saving vocal...") with open(self.vocab_path, "wb") as f: pickle.dump(self.vocab, f) if self.verbose: print("Vocab saved !")
import cv2 import matplotlib.pyplot as plt import copy import numpy as np import PoseDatabase import utils from pytorch_openpose import model, util, body database = PoseDatabase.PoseDatabase() body_estimation = body.Body('model/body_pose_model.pth') classes = [ "bridge", "childs", "downwarddog", "mountain", "plank", "seatedforwardbend", "tree", "trianglepose", "warrior1", "warrior2" ] for i in classes: print(i) oriImg = cv2.imread('images/' + i + '.jpg') # B,G,R order candidate, subset = body_estimation(oriImg) database.add_vector(i, utils.get_keypoints(subset, candidate)) database.save_database('vectorDB.pkl')
def run(data): try: print(time.time()) #TODO find logger in multiscale = [1.0, 1.5, 2.0] batch_size, height, width = 1, 401, 401 image_list = json.loads(data) pose_scoreslist = [] pose_keypoint_scoreslist = [] pose_keypoint_coordslist = [] for i in range(1): if (i == 0): input_image = np.array(image_list['input_image1'], dtype=np.uint8) else: input_image = np.array(image_list['input_image2'], dtype=np.uint8) scale_outputs = [] for i in range(len(multiscale)): scale = multiscale[i] cv_shape = (401, 401) cv_shape2 = (int(cv_shape[0] * scale), int(cv_shape[1] * scale)) scale2 = cv_shape2[0] / 600 input_img = cv2.resize(input_image, None, fx=scale2, fy=scale2) #input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB).astype(np.float32) input_img = cv2.copyMakeBorder( input_img, 0, cv_shape2[0] - input_img.shape[0], 0, cv_shape2[1] - input_img.shape[1], cv2.BORDER_CONSTANT, value=[127, 127, 127]) scale_img = input_img imgs_batch = np.zeros( (batch_size, int(scale * height), int(scale * width), 3)) imgs_batch[0] = scale_img one_scale_output = sess.run(outputs[i], feed_dict={tf_img[i]: imgs_batch}) scale_outputs.append([o[0] for o in one_scale_output]) sample_output = scale_outputs[0] for i in range(1, len(multiscale)): for j in range(len(sample_output)): sample_output[j] += scale_outputs[i][j] for j in range(len(sample_output)): sample_output[j] /= len(multiscale) H = utils.compute_heatmaps(kp_maps=sample_output[0], short_offsets=sample_output[1]) for i in range(17): H[:, :, i] = gaussian_filter(H[:, :, i], sigma=2) pred_kp = utils.get_keypoints(H) pred_skels = utils.group_skeletons(keypoints=pred_kp, mid_offsets=sample_output[2]) pred_skels = [ skel for skel in pred_skels if (skel[:, 2] > 0).sum() > 6 ] #print ('Number of detected skeletons: {}'.format(len(pred_skels))) pose_scores = np.zeros(len(pred_skels)) pose_keypoint_scores = np.zeros((len(pred_skels), 17)) pose_keypoint_coords = np.zeros((len(pred_skels), 17, 2)) for j in range(len(pred_skels)): sum = 0 for i in range(17): sum += pred_skels[j][i][2] * 100 pose_keypoint_scores[j][i] = pred_skels[j][i][2] * 100 pose_keypoint_coords[j][i][0] = pred_skels[j][i][0] pose_keypoint_coords[j][i][1] = pred_skels[j][i][1] pose_scores[j] = sum / 17 pose_scoreslist.append(pose_scores) pose_keypoint_scoreslist.append(pose_keypoint_scores) pose_keypoint_coordslist.append(pose_keypoint_coords) result = json.dumps({ 'pose_scores': pose_scoreslist, 'keypoint_scores': pose_keypoint_scoreslist, 'keypoint_coords': pose_keypoint_coordslist }) # You can return any data type, as long as it is JSON serializable. return result except Exception as e: error = str(e) return error
import PoseDatabase import utils from pytorch_openpose import model, util, body body_estimation = body.Body('model/body_pose_model.pth') # hand_estimation = Hand('model/hand_pose_model.pth') test_image = 'images/mountain.png' oriImg = cv2.imread(test_image) # B,G,R order candidate, subset = body_estimation(oriImg) print(candidate.shape) print(subset.shape) database = PoseDatabase.PoseDatabase() database.add_vector("mountain", utils.get_keypoints(subset, candidate)) img2 = 'images/anantasana.png' oriImg2 = cv2.imread(img2) candidate2, subset2 = body_estimation(oriImg2) database.add_vector("anantasana", utils.get_keypoints(subset2, candidate2)) print(database.find_match(utils.get_keypoints(subset, candidate))) canvas = copy.deepcopy(oriImg) canvas = util.draw_bodypose(canvas, candidate, subset) """ # detect hand hands_list = util.handDetect(candidate, subset, oriImg)