def cubemap2persp(self, img, FOV, THETA, PHI, Hd, Wd): # # THETA is left/right angle, PHI is up/down angle, both in degree # img = self.cubemap2equirect(img, [2 * Hd, 4 * Hd]) equ_h, equ_w = img.shape[:2] # equ_cx = (equ_w - 1) / 2.0 # equ_cy = (equ_h - 1) / 2.0 equ_cx = (equ_w) / 2.0 equ_cy = (equ_h) / 2.0 wFOV = FOV hFOV = float(Hd) / Wd * wFOV c_x = (Wd) / 2.0 c_y = (Hd) / 2.0 wangle = (180 - wFOV) / 2.0 w_len = 2 * 1 * np.sin(np.radians(wFOV / 2.0)) / np.cos( np.radians(wFOV / 2.0)) w_interval = w_len / (Wd) hangle = (180 - hFOV) / 2.0 h_len = 2 * 1 * np.sin(np.radians(hFOV / 2.0)) / np.cos( np.radians(hFOV / 2.0)) h_interval = h_len / (Hd) x_map = np.zeros([Hd, Wd], np.float32) + 1 y_map = np.tile((np.arange(0, Wd) - c_x) * w_interval, [Hd, 1]) z_map = -np.tile((np.arange(0, Hd) - c_y) * h_interval, [Wd, 1]).T D = np.sqrt(x_map**2 + y_map**2 + z_map**2) xyz = np.zeros([Hd, Wd, 3], np.float) xyz[:, :, 0] = (1 / D * x_map)[:, :] xyz[:, :, 1] = (1 / D * y_map)[:, :] xyz[:, :, 2] = (1 / D * z_map)[:, :] y_axis = np.array([0.0, 1.0, 0.0], np.float32) z_axis = np.array([0.0, 0.0, 1.0], np.float32) [R1, _] = cv2.Rodrigues(z_axis * np.radians(THETA)) [R2, _] = cv2.Rodrigues(np.dot(R1, y_axis) * np.radians(-PHI)) xyz = xyz.reshape([Hd * Wd, 3]).T xyz = np.dot(R1, xyz) xyz = np.dot(R2, xyz).T lat = np.arcsin(xyz[:, 2] / 1) lon = np.zeros([Hd * Wd], np.float) theta = np.arctan(xyz[:, 1] / xyz[:, 0]) idx1 = xyz[:, 0] > 0 idx2 = xyz[:, 1] > 0 idx3 = ((1 - idx1) * idx2).astype(np.bool) idx4 = ((1 - idx1) * (1 - idx2)).astype(np.bool) lon[idx1] = theta[idx1] lon[idx3] = theta[idx3] + np.pi lon[idx4] = theta[idx4] - np.pi lon = lon.reshape([Hd, Wd]) / np.pi * 180 lat = -lat.reshape([Hd, Wd]) / np.pi * 180 lon = lon / 180 * equ_cx + equ_cx lat = lat / 90 * equ_cy + equ_cy # # TO return the qeuirectangular image showing the region which is being projected in the perp view # for x in range(Wd): # for y in range(Hd): # cv2.circle(img, (int(lon[y, x]), int(lat[y, x])), 1, (0, 255, 0)) # return img self.map_x = lon.astype(np.float32) self.map_y = lat.astype(np.float32) persp = cv2.remap(img, lon.astype(np.float32), lat.astype(np.float32), cv2.INTER_CUBIC, borderMode=cv2.BORDER_WRAP) return persp
def calibrate(self, np_image, x_coordinate, y_coordinate, world_z): assert world_z >= 0, f"[FATAL] aruco calibrate got invalid x, y or z ({x_coordinate},{y_coordinate},{world_z})" timer = time.time() if len(self.reference_image) == 0: self.reference_image = np_image opencv_image_gray = cv2.cvtColor(self.reference_image, cv2.COLOR_BGR2GRAY) #cv2.imshow("a", opencv_image_gray) #cv2.waitKey() (corners, detected_ids, rejected_image_points) = aruco.detectMarkers(opencv_image_gray, self.aruco_dict) corners = np.array(corners).reshape( (len(detected_ids), 4, 2)) #opencv stupid detected_ids = np.array(detected_ids).reshape( (len(detected_ids))) #opencv stupid if len(detected_ids) <= 3: print("[WARNING] calibration found less than 4 markers") assert (len(detected_ids) >= 3), "Cannot work with 2 or less markers" #putting all the coordinates into arrays understood by solvePNP marker_world_coordinates = None image_coordinates = None for i in range(len(detected_ids)): if i == 0: marker_world_coordinates = self.markers[detected_ids[i]] image_coordinates = corners[i] else: marker_world_coordinates = np.concatenate( (marker_world_coordinates, self.markers[detected_ids[i]])) image_coordinates = np.concatenate( (image_coordinates, corners[i])) # finding exstrinsic camera parameters error, r_vector, t_vector = cv2.solvePnP(marker_world_coordinates, image_coordinates, self.default_intrinsic_matrix, self.distortion) r_matrix, jac = cv2.Rodrigues(r_vector) r_matrix_inverse = np.linalg.inv(r_matrix) intrinsic_matrix_inverse = np.linalg.inv(self.default_intrinsic_matrix) # finding correct scaling factor by adjusting it until the calculated Z is very close to 0, mathematically correct way didn't work ¯\_(ツ)_/¯ scaling_factor = 940 index = 0 while True: pixel_coordinates = np.array([[x_coordinate, y_coordinate, 1]]).T pixel_coordinates = scaling_factor * pixel_coordinates xyz_c = intrinsic_matrix_inverse.dot(pixel_coordinates) xyz_c = xyz_c - t_vector world_coordinates = r_matrix_inverse.dot(xyz_c) index += 1 if index > 1000: raise Exception( "aruco.py: scaling factor finding is taking longer than 1000 iterations" ) if world_coordinates[2][0] > world_z + 0.5: scaling_factor += 1 elif world_coordinates[2][0] < world_z - 0.5: scaling_factor -= 1 else: break #print("[INFO] Calibration took %.2f seconds" % (time.time() - timer)) return np.array([ world_coordinates[0][0], world_coordinates[1][0], world_coordinates[2][0] ])
rs = np.array(rs).sum(axis=2) # r = np.array([np.average(rs[:,i,0]) for i in range(3)]) # rstd = np.array([np.std(rs[:,i,0]) for i in range(3)]) # t = np.array([np.average(ts[:,i,0]) for i in range(3)]) # tstd = np.array([np.std(ts[:,i,0]) for i in range(3)]) # ts[(ts<10).all(axis=1).flatten(),:,:] filtered_ts = ts[(np.abs(ts) < MAX_T).all(axis=1)] # cut out outliers t = np.mean(filtered_ts, axis=0) tstd = np.std(filtered_ts, axis=0) filtered_rs = rs[(np.abs(ts) < MAX_T).all(axis=1)] # cut out outliers from ts r = np.mean(filtered_rs, axis=0) rstd = np.std(filtered_ts, axis=0) R, _ = cv2.Rodrigues(r) # Rt = np.concatenate((R, np.array([t]).T), axis=1) constants = np.array( (COLOR_WIDTH, COLOR_HEIGHT, COLOR_FOCAL_LENGTH, COLOR_PRINCIPAL_X, COLOR_PRINCIPAL_Y, THERMAL_WIDTH, THERMAL_HEIGHT, THERMAL_FOCAL_LENGTH, THERMAL_PRINCIPAL_X, THERMAL_PRINCIPAL_Y, dist, r, t), dtype=object) # np.save("constants 1 double r", constants) # def xyztothermal(coords): # x,y,z = np.dot(R, coords) + t # x = x/z # y = y/z # r2 = x*x + y*y # k = (1 + k1*r2 + k2*r2**2 + k3*r2**3)/(1 + k4*r2 + k5*r2**2 + k6*r2**3)
def pw3d_extract(dataset_path, out_path): # scale factor scaleFactor = 1.2 # structs we use imgnames_, scales_, centers_, parts_ = [], [], [], [] poses_, shapes_, genders_ = [], [], [] # get a list of .pkl files in the directory dataset_path = os.path.join(dataset_path, 'sequenceFiles', 'test') files = [ os.path.join(dataset_path, f) for f in os.listdir(dataset_path) if f.endswith('.pkl') ] # go through all the .pkl files for filename in files: with open(filename, 'rb') as f: data = pickle.load(f) smpl_pose = data['poses'] smpl_betas = data['betas'] poses2d = data['poses2d'] global_poses = data['cam_poses'] genders = data['genders'] valid = np.array(data['campose_valid']).astype(np.bool) num_people = len(smpl_pose) num_frames = len(smpl_pose[0]) seq_name = str(data['sequence']) img_names = np.array([ 'imageFiles/' + seq_name + '/image_%s.jpg' % str(i).zfill(5) for i in range(num_frames) ]) # get through all the people in the sequence for i in range(num_people): valid_pose = smpl_pose[i][valid[i]] valid_betas = np.tile(smpl_betas[i][:10].reshape(1, -1), (num_frames, 1)) valid_betas = valid_betas[valid[i]] valid_keypoints_2d = poses2d[i][valid[i]] valid_img_names = img_names[valid[i]] valid_global_poses = global_poses[valid[i]] gender = genders[i] # consider only valid frames for valid_i in range(valid_pose.shape[0]): part = valid_keypoints_2d[valid_i, :, :].T part = part[part[:, 2] > 0, :] bbox = [ min(part[:, 0]), min(part[:, 1]), max(part[:, 0]), max(part[:, 1]) ] center = [(bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2] scale = scaleFactor * max(bbox[2] - bbox[0], bbox[3] - bbox[1]) / 200 # transform global pose pose = valid_pose[valid_i] extrinsics = valid_global_poses[valid_i][:3, :3] pose[:3] = cv2.Rodrigues( np.dot(extrinsics, cv2.Rodrigues(pose[:3])[0]))[0].T[0] imgnames_.append(valid_img_names[valid_i]) centers_.append(center) scales_.append(scale) poses_.append(pose) shapes_.append(valid_betas[valid_i]) genders_.append(gender) # store data if not os.path.isdir(out_path): os.makedirs(out_path) out_file = os.path.join(out_path, '3dpw_test.npz') np.savez(out_file, imgname=imgnames_, center=centers_, scale=scales_, pose=poses_, shape=shapes_, gender=genders_)
def getQRPosition (path): # create a reader scanner = zbar.ImageScanner() # configure the reader scanner.parse_config('enable') # obtain image data img = cv2.imread(path) gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) pil = Image.open(path).convert('L') width, height = pil.size raw = pil.tobytes() # wrap image data image = zbar.Image(width, height, 'Y800', raw) # scan the image for barcodes scanner.scan(image) res=False # extract results for symbol in image: # do something useful with results print 'decoded', symbol.type, 'symbol', '"%s"' % symbol.data QRData=symbol.data topLeftCorners, bottomLeftCorners, bottomRightCorners, topRightCorners = [item for item in symbol.location] topLeftCorner=[[topLeftCorners[0],topLeftCorners[1]]] bottomLeftCorner=[[bottomLeftCorners[0],bottomLeftCorners[1]]] topRightCorner=[[topRightCorners[0],topRightCorners[1]]] bottomRightCorner=[[bottomRightCorners[0],bottomRightCorners[1]]] corners = np.asarray([bottomLeftCorner,bottomRightCorner,topLeftCorner,topRightCorner],dtype=np.float32) res=True if (res==False): print 'Not able to scan QR' return 0,0,0 exit(1) objp = np.zeros((2*2,3), np.float32) objp[:,:2] = np.mgrid[0:2,0:2].T.reshape(-1,2) # termination criteria criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001) objpoints = [] # 3d point in real world space imgpoints = [] # 2d points in image plane. objpoints.append(objp) imgpoints.append(corners) ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1],None,None) retval,rvec,tvec = cv2.solvePnP(objp, corners, mtx, dist) a=np.zeros((4,4),np.float32) a[0:3,0:3],_ = cv2.Rodrigues(np.asarray(rvecs)) a[0:3,3]= np.transpose(np.asarray(tvecs)[0])[0] a[3,3]=1 ai = np.linalg.inv(a) return 1, QRData, ai[0:3,3] #axis = np.float32([[1,0,0], [0,1,0], [0,0,-1]]).reshape(-1,3) #def draw(img, corners, imgpts): # corner = tuple(corners[0].ravel()) # img = cv2.line(img, corner, tuple(imgpts[0].ravel()), (255,0,0), 5) # img = cv2.line(img, corner, tuple(imgpts[1].ravel()), (0,255,0), 5) # img = cv2.line(img, corner, tuple(imgpts[2].ravel()), (0,0,255), 5) # return img #imgpts, jac = cv2.projectPoints(axis, rvec, tvec, mtx, dist) #img = draw(img,corners,imgpts) #cv2.imwrite('res.jpg', img) # cv2.imwrite('res.jpg',img) # clean up del(image)
for i in range(1,total_points_used): wX=worldPoints[i,0]-X_center wY=worldPoints[i,1]-Y_center wd=worldPoints[i,2] d1=np.sqrt(np.square(wX)+np.square(wY)) wZ=np.sqrt(np.square(wd)-np.square(d1)) worldPoints[i,2]=wZ dist = np.array([0.03, 0.001, 0.0, 0.0, 0.01]) w,h = 640,360 #camera_matrix_new, roi=cv2.getOptimalNewCameraMatrix(camera_matrix, dist, (w,h), 1, (w,h)) camera_matrix_new = camera_matrix _, R, Translation=cv2.solvePnP(worldPoints,imagePoints,camera_matrix_new, dist) R_mtx, jac=cv2.Rodrigues(R) inverse_R_mtx = np.linalg.inv(R_mtx) inverse_camera_matrix_new = np.linalg.inv(camera_matrix_new) s = 0.1 # 1.0 #scaling print('newcam_mtx: ',np.shape(camera_matrix_new)) print('Translation: ', np.shape(Translation)) print("R - rodrigues: ", np.shape(R_mtx)) def compute_XYZ(u, v): #from 2D pixels to 3D world uv_ = np.array([[u, v, 1]], dtype=np.float32).T suv_ = s * uv_ xyz_ = inverse_camera_matrix_new.dot(suv_) - Translation XYZ = inverse_R_mtx.dot(xyz_) pred = XYZ.T[0]
def estimate_gaze(base_name, color_img, dist_coefficients, camera_matrix): faceboxes = landmark_estimator.get_face_bb(color_img) # faceboxes = [[0., 0., 224., 224.]] print('%d faces detected.'%len(faceboxes)) if len(faceboxes) == 0: tqdm.write('Could not find faces in the image') return subjects = landmark_estimator.get_subjects_from_faceboxes(color_img, faceboxes) extract_eye_image_patches(subjects) input_r_list = [] # [3, 224, 224] input_l_list = [] # [3, 224, 224] input_head_list = [] # (float,float) valid_subject_list = [] for idx, subject in enumerate(subjects): if subject.left_eye_color is None or subject.right_eye_color is None: tqdm.write('Failed to extract eye image patches') continue success, rotation_vector, _ = cv2.solvePnP(landmark_estimator.model_points, subject.landmarks.reshape(len(subject.landmarks), 1, 2), cameraMatrix=camera_matrix, distCoeffs=dist_coefficients, flags=cv2.SOLVEPNP_DLS) if not success: tqdm.write('Not able to extract head pose for subject {}'.format(idx)) continue _rotation_matrix, _ = cv2.Rodrigues(rotation_vector) _rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]])) _m = np.zeros((4, 4)) _m[:3, :3] = _rotation_matrix _m[3, 3] = 1 # Go from camera space to ROS space _camera_to_ros = [[0.0, 0.0, 1.0, 0.0], [-1.0, 0.0, 0.0, 0.0], [0.0, -1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0]] roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m))) roll_pitch_yaw = limit_yaw(roll_pitch_yaw) phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw) face_image_resized = cv2.resize(subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC) head_pose_image = landmark_estimator.visualize_headpose_result(face_image_resized, (phi_head, theta_head)) if args.vis_headpose: plt.axis("off") plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB)) plt.show() if args.save_headpose: cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_headpose_%d.jpg'%idx), head_pose_image) # cv2.imwrite(os.path.join(args.output_path, 'face_' + os.path.splitext(base_name)[0] + '_%d.jpg'%idx), face_image_resized) input_r_list.append(gaze_estimator.input_from_image(subject.right_eye_color)) input_l_list.append(gaze_estimator.input_from_image(subject.left_eye_color)) input_head_list.append([theta_head, phi_head]) valid_subject_list.append(idx) if len(valid_subject_list) == 0: return gaze_est = gaze_estimator.estimate_gaze_twoeyes(inference_input_left_list=input_l_list, inference_input_right_list=input_r_list, inference_headpose_list=input_head_list) for subject_id, gaze, headpose in zip(valid_subject_list, gaze_est.tolist(), input_head_list): subject = subjects[subject_id] # Build visualizations # print(gaze) # [0.1808396279811859, -0.051273975521326065] r_gaze_img = gaze_estimator.visualize_eye_result(subject.right_eye_color, gaze) l_gaze_img = gaze_estimator.visualize_eye_result(subject.left_eye_color, gaze) s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1) if args.vis_gaze: plt.axis("off") plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB)) plt.show() if args.save_gaze: cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_gaze.jpg'), s_gaze_img) # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_left.jpg'), subject.left_eye_color) # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_right.jpg'), subject.right_eye_color) if args.save_estimate: with open(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_output.txt'), 'w+') as f: f.write(os.path.splitext(base_name)[0] + ', [' + str(headpose[1]) + ', ' + str(headpose[0]) + ']' + ', [' + str(gaze[1]) + ', ' + str(gaze[0]) + ']' + '\n')
def train_data(dataset_path, openpose_path, out_path, joints_idx, scaleFactor, extract_img=False, fits_3d=None): joints17_idx = [ 4, 18, 19, 20, 23, 24, 25, 3, 5, 6, 7, 9, 10, 11, 14, 15, 16 ] h, w = 2048, 2048 imgnames_, scales_, centers_ = [], [], [] parts_, Ss_, openposes_ = [], [], [] # training data user_list = range(1, 9) seq_list = range(1, 3) vid_list = list(range(3)) + list(range(4, 9)) counter = 0 for user_i in user_list: for seq_i in seq_list: seq_path = os.path.join(dataset_path, 'S' + str(user_i), 'Seq' + str(seq_i)) # mat file with annotations annot_file = os.path.join(seq_path, 'annot.mat') annot2 = sio.loadmat(annot_file)['annot2'] annot3 = sio.loadmat(annot_file)['annot3'] # calibration file and camera parameters calib_file = os.path.join(seq_path, 'camera.calibration') Ks, Rs, Ts = read_calibration(calib_file, vid_list) for j, vid_i in enumerate(vid_list): # image folder imgs_path = os.path.join(seq_path, 'imageFrames', 'video_' + str(vid_i)) # extract frames from video file if extract_img: # if doesn't exist if not os.path.isdir(imgs_path): os.makedirs(imgs_path) # video file vid_file = os.path.join(seq_path, 'imageSequence', 'video_' + str(vid_i) + '.avi') vidcap = cv2.VideoCapture(vid_file) # process video frame = 0 while 1: # extract all frames success, image = vidcap.read() if not success: break frame += 1 # image name imgname = os.path.join(imgs_path, 'frame_%06d.jpg' % frame) # save image cv2.imwrite(imgname, image) # per frame cam_aa = cv2.Rodrigues(Rs[j])[0].T[0] pattern = os.path.join(imgs_path, '*.jpg') img_list = glob.glob(pattern) for i, img_i in enumerate(img_list): # for each image we store the relevant annotations img_name = img_i.split('/')[-1] img_view = os.path.join('S' + str(user_i), 'Seq' + str(seq_i), 'imageFrames', 'video_' + str(vid_i), img_name) joints = np.reshape(annot2[vid_i][0][i], (28, 2))[joints17_idx] S17 = np.reshape(annot3[vid_i][0][i], (28, 3)) / 1000 S17 = S17[joints17_idx] - S17[4] # 4 is the root bbox = [ min(joints[:, 0]), min(joints[:, 1]), max(joints[:, 0]), max(joints[:, 1]) ] center = [(bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2] scale = scaleFactor * max(bbox[2] - bbox[0], bbox[3] - bbox[1]) / 200 # check that all joints are visible x_in = np.logical_and(joints[:, 0] < w, joints[:, 0] >= 0) y_in = np.logical_and(joints[:, 1] < h, joints[:, 1] >= 0) ok_pts = np.logical_and(x_in, y_in) if np.sum(ok_pts) < len(joints_idx): continue part = np.zeros([24, 3]) part[joints_idx] = np.hstack([joints, np.ones([17, 1])]) # json_file = os.path.join(openpose_path, 'mpi_inf_3dhp', img_view.replace('.jpg', '_keypoints.json')) # openpose = read_openpose(json_file, part, 'mpi_inf_3dhp') S = np.zeros([24, 4]) S[joints_idx] = np.hstack([S17, np.ones([17, 1])]) # because of the dataset size, we only keep every 10th frame counter += 1 if counter % 10 != 1: continue # store the data imgnames_.append(img_view) centers_.append(center) scales_.append(scale) parts_.append(part) Ss_.append(S) # openposes_.append(openpose) # store the data struct if not os.path.isdir(out_path): os.makedirs(out_path) out_file = os.path.join(out_path, 'mpi_inf_3dhp_train.npz') if fits_3d is not None: fits_3d = np.load(fits_3d) np.savez( out_file, imgname=imgnames_, center=centers_, scale=scales_, part=parts_, pose=fits_3d['pose'], shape=fits_3d['shape'], has_smpl=fits_3d['has_smpl'], S=Ss_, # openpose=openposes_ ) else: np.savez( out_file, imgname=imgnames_, center=centers_, scale=scales_, part=parts_, S=Ss_, # openpose=openposes_ )
if "END OF" in lines[i]: break i += 1 #deepening of the face #indsToDeepen = [63, 61, 62, 47, 45, 44, 0, # 11, 12, 14, 29, 28, 30] #mean3[indsToDeepen, 2] -= 0.35 mouthVertices = [[82, 89, 84], [82, 87, 40], [82, 84, 40], [40, 81, 87], [81, 40, 83], [81, 83, 88]] vertices = np.vstack((vertices, mouthVertices)) r = np.array([0, 0, 3.14]) R = cv2.Rodrigues(r)[0] for i in range(modes3.shape[0]): modes3[i] = np.dot(modes3[i], R.T) mean3 = np.dot(mean3, R.T) R = np.zeros((3, 3)) R[0][0] = -1 R[1][1] = 1 R[2][2] = -1 for i in range(modes3.shape[0]): modes3[i] = np.dot(modes3[i], R.T) mean3 = np.dot(mean3, R.T) mean3 = mean3.T modes3 = np.transpose(modes3, [0, 2, 1]) #plots.plot3D(mean3)
def substance(self,inimg): lowColor=np.array([53,20,211]) highColor=np.array([86,255,255]) imghls = cv2.cvtColor(inimg, cv2.COLOR_BGR2HLS) mask=cv2.inRange(imghls,lowColor,highColor) contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) #now filter the contours biggestIndex=-1; secondIndex=-1; biggestSize=0; secondSize=0; index=0 backtorgb=cv2.cvtColor(mask,cv2.COLOR_GRAY2BGR) if len(contours)>0: for c in contours: if cv2.contourArea(c)>biggestSize: secondSize=biggestSize secondIndex=biggestIndex biggestSize=cv2.contourArea(c) biggestIndex=index elif cv2.contourArea(c)>secondSize: secondSize=cv2.contourArea(c) secondIndex=index index=index+1 if biggestIndex>-1: cv2.drawContours(backtorgb, contours, biggestIndex, (0,255,0), 1) brect1=cv2.minAreaRect(contours[biggestIndex]) brectPoints=cv2.boxPoints(brect1) brectPoints=np.int0(brectPoints) cv2.drawContours(backtorgb,[brectPoints],0,(0,0,255),1) boxText="("+str(brectPoints[0][0])+","+str(brectPoints[0][1])+")"+"("+str(brectPoints[1][0])+","+str(brectPoints[1][1])+")""("+str(brectPoints[2][0])+","+str(brectPoints[2][1])+")""("+str(brectPoints[3][0])+","+str(brectPoints[3][1])+")" #cv2.putText(backtorgb, boxText,(3, 233), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255)) if self.written==False: self.datafile.write(boxText) self.datafile.write("\n") if secondIndex>-1: cv2.drawContours(backtorgb, contours, secondIndex, (0,255,0), 1) brect2=cv2.minAreaRect(contours[secondIndex]) brectPoints2=cv2.boxPoints(brect2) brectPoints2=np.int0(brectPoints2) boxText="("+str(brectPoints2[0][0])+","+str(brectPoints2[0][1])+")"+"("+str(brectPoints2[1][0])+","+str(brectPoints2[1][1])+")""("+str(brectPoints2[2][0])+","+str(brectPoints2[2][1])+")""("+str(brectPoints2[3][0])+","+str(brectPoints2[3][1])+")" cv2.putText(backtorgb, boxText,(3, 233), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255)) cv2.drawContours(backtorgb,[brectPoints2],0,(0,0,255),1) if self.written==False: self.datafile.write(boxText) self.datafile.write("\n") #if you are here, there must be two contours. Find the appropriate #corners v1=641 #there ought to be an easier way, but...... v2=641 v3=641 v4=641 v1i=-1 v2i=-1 v3i=-1 v4i=-1 #This is possibly the dumbest sort I've ever done. But it works and I was tired. #It is really a modification of some earlier code, that made sense. for index in range(0,4): if brectPoints[index][1]<v1: v4i=v3i v3i=v2i v2i=v1i v1i=index v4=v3 v3=v2 v2=v1 v1=brectPoints[index][1] elif brectPoints[index][1]<v2: v4i=v3i v3i=v2i v2i=index v4=v3 v3=v2 v2=brectPoints[index][1] elif brectPoints[index][1]<v3: v4i=v3i v3i=index v4=v3 v3=brectPoints[index][1] else: v4i=index v4=brectPoints[index][1] point1_1=brectPoints[v1i] point2_1=brectPoints[v2i] point3_1=brectPoints[v3i] point4_1=brectPoints[v4i] v1=641 #there ought to be an easier way, but...... v2=641 v3=641 v4=641 v1i=-1 v2i=-1 v3i=-1 v4i=-1 for index in range(0,4): if brectPoints2[index][1]<v1: v4i=v3i v3i=v2i v2i=v1i v1i=index v4=v3 v3=v2 v2=v1 v1=brectPoints2[index][1] elif brectPoints2[index][1]<v2: v4i=v3i v3i=v2i v2i=index v4=v3 v3=v2 v2=brectPoints2[index][1] elif brectPoints2[index][1]<v3: v4i=v3i v3i=index v4=v3 v3=brectPoints2[index][1] else: v4i=index v4=brectPoints2[index][1] point1_2=brectPoints2[v1i] point2_2=brectPoints2[v2i] point3_2=brectPoints2[v3i] point4_2=brectPoints2[v4i] #Almost there - set up the arrays of object and image points x1=point1_1[0] x2=point1_2[0] imagepoints=np.array((point1_1,point2_1,point3_1,point4_1,point1_2,point2_2,point3_2,point4_2),dtype=np.float) # logstring(str(imagepoints)) if (x1<x2) : objpoints=np.array(((-5.94,0.5,0),(-4,0,0),(-7.32,-4.82,0),(-5.38,-5.32,0),(5.94,0.5,0),(4,0,0),(7.32,-4.82,0),(5.38,-5.32,0)),dtype=np.float) else: objpoints=np.array(((5.94,0.5,0),(4,0,0),(7.32,-4.82,0),(5.38,-5.32,0),(-5.94,0.5,0),(-4,0,0),(-7.32,-4.82,0),(-5.38,-5.32,0)),dtype=np.float) #imagepoints=np.array(((186,220),(188,212),(187,171),(186,164)),dtype=np.float) cv2.putText(backtorgb, str(imagepoints[4][0])+" "+str(imagepoints[4][1]),(3, 65), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255)) cv2.putText(backtorgb, str(imagepoints[5][0])+" "+str(imagepoints[5][1]),(3, 85), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255)) cv2.putText(backtorgb, str(imagepoints[6][0])+" "+str(imagepoints[6][1]),(3, 105), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255)) cv2.putText(backtorgb, str(imagepoints[7][0])+" "+str(imagepoints[7][1]),(3, 125), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255)) # cv2.putText(backtorgb, str(topcorner1[0])+" "+str(topcorner1[1]),(160,65), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255)) # cv2.putText(backtorgb, str(secondcorner1[0])+" "+str(secondcorner1[1]),(160,85), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255)) # cv2.putText(backtorgb, str(topcorner2[0])+" "+str(topcorner2[1]),(160,105), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255)) # cv2.putText(backtorgb, str(secondcorner2[0])+" "+str(secondcorner2[1]),(160,125), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255)) #The moment of truth? errorestimate,rvec,tvec=cv2.solvePnP(objpoints,imagepoints,self.cameraMatrix,self.distcoeff) reprojectedPoints,jacobian=cv2.projectPoints(objpoints,rvec,tvec,self.cameraMatrix,self.distcoeff) reprojectedPoints=np.reshape(reprojectedPoints,(-1,2)) reprojectionError=0 distsquared=0 for pi in range(0,8): #pi=pointindex distsquared=(reprojectedPoints[pi][0]-imagepoints[pi][0])*(reprojectedPoints[pi][0]-imagepoints[pi][0])+(reprojectedPoints[pi][1]-imagepoints[pi][1])*(reprojectedPoints[pi][1]-imagepoints[pi][1])+distsquared # cv2.projectPoints(objectPoints, rvec, tvec, cameraMatrix, distCoeffs[, imagePoints[, jacobian[, aspectRatio]]]) ? imagePoints, jacobian? #cv2.putText(backtorgb, str(tvec[0])+" "+str(tvec[1])+" "+str(tvec[2]),(3, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255)) cv2.putText(backtorgb, "%.2f" % tvec[0]+" "+"%.2f" % tvec[1]+" "+"%.2f" % tvec[2],(3, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255)) cv2.putText(backtorgb, "%.2f" % (rvec[0]*180/3.14159)+" "+ "%.2f" % (rvec[1]*180/3.14159)+" "+"%.2f" % (rvec[2]*180/3.14159),(3, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255)) #spit things out on the serial port, but first, do some testing. Print stuff out. #change coordinate systems ZYX,jac=cv2.Rodrigues(rvec) yaw=0 roll,pitch,yaw=self.rotationMatrixToEulerAngles(ZYX) if (pitch>np.pi/2): pitch=pitch-np.pi yaw=-yaw roll=-roll elif (pitch<-np.pi/2): pitch=pitch+np.pi yaw=-yaw roll=-roll #Now we have a 3x3 rotation matrix, and a translation vector. Form the 4x4 transformation matrix using homogeneous coordinates. #There are probably numpy functions for array/matrix manipulations that would make this easier, but I don?t know them and this works. totaltransformmatrix=np.array([[ZYX[0,0],ZYX[0,1],ZYX[0,2],tvec[0]],[ZYX[1,0],ZYX[1,1],ZYX[1,2],tvec[1]],[ZYX[2,0],ZYX[2,1],ZYX[2,2],tvec[2]],[0,0,0,1]]) #The resulting array is the transformation matrix from world coordinates (centered on the target) to camera coordinates. (Centered on the camera) We need camera to world. That is just the inverse of that matrix. WtoC=np.mat(totaltransformmatrix) # inverserotmax=np.linalg.inv(totaltransformmatrix) # cv2.putText(backtorgb, "%.2f" % inverserotmax[0,3]+" "+"%.2f" % inverserotmax[1,3]+" "+"%.2f" % inverserotmax[2,3],(3, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255)) yawdegrees=yaw*180/np.pi cv2.putText(backtorgb, "%.2f" % yawdegrees,(3, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255)) cv2.putText(backtorgb, "%.2f" % distsquared,(100, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255)) processedFileName="output"+str(self.currentimagecount-1)+".png" # cv2.imwrite(processedFileName,backtorgb) # cv2.imwrite("output7354.png",backtorgb) jevois.sendSerial("targets "+str(tvec[0][0])+" "+str(tvec[2][0])+" "+str(yaw)) return backtorgb else: return inimg
def estimate_gaze(base_name, color_img, dist_coefficients, camera_matrix ,label): global FPS cv2.putText(color_img, "FPS : "+FPS, (10,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv2.LINE_AA) start = time.time() #face box의 위치를 반환.(모든 대상 list로 반환) -[left_x, top_y, right_x, bottom_y] faceboxes = landmark_estimator.get_face_bb(color_img) #draw bounding box for facebox in faceboxes: left = int(facebox[0]) top = int(facebox[1]) right = int(facebox[2]) bottom = int(facebox[3]) cv2.rectangle(color_img,(left,top),(right,bottom),(0,255,0),2) if len(faceboxes) == 0: tqdm.write('Could not find faces in the image') # if EVAL and not SHIFT: # head_phi.append(-100) # head_theta.append(-100) # gaze_phi.append(-100) # gaze_theta.append(-100) return # check_people(faceboxes) subjects = landmark_estimator.get_subjects_from_faceboxes(color_img, faceboxes) extract_eye_image_patches(subjects) input_r_list = [] input_l_list = [] input_head_list = [] valid_subject_list = [] people_count = 1; frame_img =color_img for idx, subject in enumerate(subjects): people_idx = get_people(faceboxes[idx][0]) people_list[people_idx].set_bbox_l(faceboxes[idx][0]) if subject.left_eye_color is None or subject.right_eye_color is None: tqdm.write('Failed to extract eye image patches') continue success, rotation_vector, _ = cv2.solvePnP(landmark_estimator.model_points, subject.landmarks.reshape(len(subject.landmarks), 1, 2), cameraMatrix=camera_matrix, distCoeffs=dist_coefficients, flags=cv2.SOLVEPNP_DLS) if not success: tqdm.write('Not able to extract head pose for subject {}'.format(idx)) continue _rotation_matrix, _ = cv2.Rodrigues(rotation_vector) _rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]])) _m = np.zeros((4, 4)) _m[:3, :3] = _rotation_matrix _m[3, 3] = 1 # Go from camera space to ROS space _camera_to_ros = [[0.0, 0.0, 1.0, 0.0], [-1.0, 0.0, 0.0, 0.0], [0.0, -1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0]] roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m))) roll_pitch_yaw = limit_yaw(roll_pitch_yaw) phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw) # if EVAL: # head_phi.append(phi_head) # head_theta.append(theta_head) # face_image_resized = cv2.resize(subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC) # head_pose_image = landmark_estimator.visualize_headpose_result(face_image_resized, (phi_head, theta_head)) #color_image의 facebox에 headpose vector를 그림. if EVAL: head_pose_image, headpose_err = landmark_estimator.visualize_headpose_result(frame_img,faceboxes[idx], (phi_head, theta_head), people_list[people_idx],label) else: head_pose_image, headpose_err = landmark_estimator.visualize_headpose_result(frame_img,faceboxes[idx], (phi_head, theta_head), people_list[people_idx]) frame_img = head_pose_image if EVAL: headpose_error.append(headpose_err) print("head pose error:",headpose_err) if args.mode =='image': #show headpose # if args.vis_headpose: # plt.axis("off") # plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB)) # plt.show() if args.save_headpose: cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0]+str(people_count) + '_headpose.jpg'), head_pose_image) people_count +=1 #size 등 format 변경. input_r_list.append(gaze_estimator.input_from_image(subject.right_eye_color)) input_l_list.append(gaze_estimator.input_from_image(subject.left_eye_color)) input_head_list.append([theta_head, phi_head]) valid_subject_list.append(idx) # if args.mode =='video': # # plt.axis("off") # # plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB)) # # plt.show() # headpose_out_video.write(frame_img) if len(valid_subject_list) == 0: return # returns [subject : [gaze_pose]] gaze_est = gaze_estimator.estimate_gaze_twoeyes(inference_input_left_list=input_l_list, inference_input_right_list=input_r_list, inference_headpose_list=input_head_list) people_count = 1 for subject_id, gaze, headpose in zip(valid_subject_list, gaze_est.tolist(), input_head_list): subject = subjects[subject_id] facebox = faceboxes[subject_id] people_idx = get_people(facebox[0]) # Build visualizations # r_gaze_img = gaze_estimator.visualize_eye_result(subject.right_eye_color, gaze) # l_gaze_img = gaze_estimator.visualize_eye_result(subject.left_eye_color, gaze) # if EVAL: # gaze_theta.append(gaze[0]) # gaze_phi.append(gaze[1]) if EVAL: r_gaze_img, r_gaze_err = gaze_estimator.visualize_eye_result(frame_img, gaze, subject.leftcenter_coor, facebox,people_list[people_idx], "gaze_r", label) l_gaze_img, l_gaze_err = gaze_estimator.visualize_eye_result(r_gaze_img, gaze, subject.rightcenter_coor, facebox,people_list[people_idx], "gaze_l", label) else: r_gaze_img, r_gaze_err = gaze_estimator.visualize_eye_result(frame_img, gaze, subject.leftcenter_coor, facebox,people_list[people_idx], "gaze_r") l_gaze_img, l_gaze_err = gaze_estimator.visualize_eye_result(r_gaze_img, gaze, subject.rightcenter_coor, facebox,people_list[people_idx], "gaze_l") frame_img = l_gaze_img if EVAL: print("right gaze error:",r_gaze_err) print("left gaze error:",l_gaze_err) gaze_error.append(r_gaze_err) gaze_error.append(l_gaze_err) #show gaze image # if args.vis_gaze: # plt.axis("off") # plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB)) # plt.show() if args.mode =='image': if args.save_gaze: cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0]+str(people_count) + '_gaze.jpg'), frame_img) # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_left.jpg'), subject.left_eye_color) # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_right.jpg'), subject.right_eye_color) if args.save_estimate: with open(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_output.txt'), 'w+') as f: f.write(os.path.splitext(base_name)[0] + ', [' + str(headpose[1]) + ', ' + str(headpose[0]) + ']' + ', [' + str(gaze[1]) + ', ' + str(gaze[0]) + ']' + '\n') people_count +=1 if args.mode =='video': out_video.write(frame_img) end = time.time() delay_time = end-start FPS = str(int(1/delay_time))
draw_3D_axis(img, (object_points_2.vertexes[3].x, object_points_2.vertexes[3].y), B_axis_imgpts) cv2.imshow("img", img) cv2.waitKey(0) cv2.destroyAllWindows() # get rotation angle A_angle_rad = np.sqrt((A_rvecs[0] * A_rvecs[0]) + (A_rvecs[1] * A_rvecs[1]) + (A_rvecs[2] * A_rvecs[2])) B_angle_rad = np.sqrt((B_rvecs[0] * B_rvecs[0]) + (B_rvecs[1] * B_rvecs[1]) + (B_rvecs[2] * B_rvecs[2])) # use Rodrigues to transform pvr to dcm AdcmO, _ = cv2.Rodrigues(src=A_rvecs) BdcmO, _ = cv2.Rodrigues(src=B_rvecs) # cast to matrix to perform dot product AdcmO = Matrix(AdcmO) BdcmO = Matrix(BdcmO) AdcmB1 = BdcmO * AdcmO.transpose() BdcmA1 = AdcmO * BdcmO.transpose() # Get Error Matrix EdcmAB = AdcmB1 * trueAdcmB.transpose() EdcmBA = BdcmA1 * trueBdcmA.transpose() print("AdcmB1:", AdcmB1) print("BdcmA1:", BdcmA1)
corners, markerLength, mtx, dist) for i in range(0, ids.size): aruco.drawAxis(frame, mtx, dist, rvec[i], tvec[i], 5) # show translation vector on the corner font = cv2.FONT_HERSHEY_SIMPLEX text = str([round(i, 5) for i in tvec[i][0]]) position = tuple(corners[i][0][0]) savetvec.append( [ids[i][0], tvec[i][0][0], tvec[i][0][1], tvec[i][0][2]]) savervec.append( [ids[i][0], rvec[i][0][0], rvec[i][0][1], rvec[i][0][2]]) rot_mat, _ = cv2.Rodrigues(rvec[i][:][:]) c2 = (rot_mat[0][0]**2 + rot_mat[0][1]**2)**(1 / 2) thetaR = math.atan2(-rot_mat[0][2], c2) savethetaR.append([ids[i][0], thetaR]) #print(dist) Z = savetvec[-1][3] X = savetvec[-1][1] thetaR_deg = thetaR * 180 / math.pi #Marker data index = savetvec[-1][0] Xm = marker_pose(index, 0) Ym = marker_pose(index, 1)
corners, ids, rejected_corners = cv2.aruco.detectMarkers(frame, aruco_dict) frame = cv2.aruco.drawDetectedMarkers(frame, corners, ids) # Parse the id list for valid commands, keeping track of ignored commands and duplicates if (len(corners) > 0): move_dir_cmd, match_orient_cmd, match_pos_cmd, change_alt_cmd, ignored_cmds, num_duplicates = parseImageCmds(ids) # Execute each command, if valid # Move the way that the marker is pointing if move_dir_cmd is not None: move_dir_corners = corners[np.nonzero(ids == move_dir_cmd)[0][0]] # Get the corners corresponding to this ID, and draw pose rvecs, tvecs, objPoints = cv2.aruco.estimatePoseSingleMarkers(move_dir_corners, MARKER_SIDE_FEET, camera_matrix, dist_coeffs) rvec = rvecs[0][0] tvec = tvecs[0][0] frame = cv2.aruco.drawAxis(frame, camera_matrix, dist_coeffs, rvec, tvec, 1) rotation_matrix, jacobian = cv2.Rodrigues(rvec) euler = rotationMatrixToEulerAngles(rotation_matrix) x_heading = math.sin(euler[2]) # sin/cos flipped around because of the way markers are. y_heading = math.cos(euler[2]) # Rotate until we're match the marker's rotation if match_orient_cmd is not None: match_orient_corners = corners[np.nonzero(ids == match_orient_cmd)[0][0]] rvecs, tvecs, objPoints = cv2.aruco.estimatePoseSingleMarkers(match_orient_corners, MARKER_SIDE_FEET, camera_matrix, dist_coeffs) rvec = rvecs[0][0] tvec = tvecs[0][0] frame = cv2.aruco.drawAxis(frame, camera_matrix, dist_coeffs, rvec, tvec, 1) rotation_matrix, jacobian = cv2.Rodrigues(rvec) euler = rotationMatrixToEulerAngles(rotation_matrix) rotation = -euler[2]
def normalizeData(img, face, hr, ht, cam, gc=None): # Rodrigues -> Converts a rotation matrix to a rotation vector or vice versa. ## normalized camera parameters focal_norm = 1600 # focal length of normalized camera distance_norm = 1000 # normalized distance between eye and camera roiSize = (448, 448) # size of cropped eye image img_u = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) ## compute estimated 3D positions of the landmarks #ht = ht.reshape((3,1)) # ht = np.repeat(ht, 6, axis=1) if gc: gc = gc.reshape((3, 1)) hR = cv2.Rodrigues(hr)[0] # converts rotation vector to rotation matrix Fc = np.dot(hR, face) + ht # 3D positions of facial landmarks face_center = np.sum(Fc, axis=1, dtype=np.float32) / 6.0 ## ---------- normalize image ---------- distance = np.linalg.norm( face_center) # actual distance face center and original camera z_scale = distance_norm / distance cam_norm = np.array([ [focal_norm, 0, roiSize[0] / 2], [0, focal_norm, roiSize[1] / 2], [0, 0, 1.0], ]) S = np.array([ # scaling matrix [1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, z_scale], ]) hRx = hR[:, 0] forward = (face_center / distance).reshape(3) down = np.cross(forward, hRx) down /= np.linalg.norm(down) right = np.cross(down, forward) right /= np.linalg.norm(right) R = np.c_[right, down, forward].T # rotation matrix R W = np.dot(np.dot(cam_norm, S), np.dot(R, np.linalg.inv(cam))) # transformation matrix img_warped = cv2.warpPerspective(img_u, W, roiSize) # image normalization img_warped = cv2.equalizeHist(img_warped) ## ---------- normalize rotation ---------- hR_norm = np.dot(R, hR) # rotation matrix in normalized space hr_norm = cv2.Rodrigues(hR_norm)[ 0] # convert rotation matrix to rotation vectors ## ---------- normalize gaze vector ---------- if gc: gc_normalized = gc - et # gaze vector # For modified data normalization, scaling is not applied to gaze direction, so here is only R applied. # For original data normalization, here should be: # "M = np.dot(S,R) # gc_normalized = np.dot(R, gc_normalized)" gc_normalized = np.dot(R, gc_normalized) gc_normalized = gc_normalized / np.linalg.norm(gc_normalized) if gc: data = [img_warped, hr_norm, gc_normalized] else: data = [img_warped, hr_norm] return data
def get_landmark_positions( stored_parameter_fp, # pylint: disable=too-many-locals, too-many-arguments resolution, resolution_orig, # pylint: disable=unused-argument landmarks, trans=(0, 0), # pylint: disable=unused-argument scale=1., steps_x=3, steps_y=12): """Get landmark positions for a given image.""" with open(stored_parameter_fp, 'rb') as inf: stored_parameters = pickle.load(inf) orig_pose = np.array(stored_parameters['pose']).copy() orig_rt = np.array(stored_parameters['rt']).copy() orig_trans = np.array(stored_parameters['trans']).copy() orig_t = np.array(stored_parameters['t']).copy() model = MODEL_NEUTRAL model.betas[:len(stored_parameters['betas'])] = stored_parameters['betas'] mesh = _TEMPLATE_MESH # Use always the image center for rendering. orig_t[0] = 0. orig_t[1] = 0. orig_t[2] /= scale # Prepare for rendering. angles_y = np.linspace(0., 2. * (1. - 1. / steps_y) * np.pi, steps_y) elevation_maxextent = (steps_x - 1) // 2 * 0.2 * np.pi angles_x = np.linspace(-elevation_maxextent, elevation_maxextent, steps_x) if steps_x == 1: # Assume plain layout. angles_x = (0., ) angles = itertools.product(angles_y, angles_x) landmark_positions = [] full_parameters = [] for angle_y, angle_x in angles: stored_parameters['rt'] = orig_rt.copy() stored_parameters['rt'][0] += angle_x stored_parameters['rt'][1] += angle_y ####################################################################### # Zero out camera translation and rotation and move this information # to the body root joint rotations and 'trans' parameter. #print orig_pose[:3] cam_rdg, _ = cv2.Rodrigues(np.array(stored_parameters['rt'])) per_rdg, _ = cv2.Rodrigues(np.array(orig_pose)[:3]) resrot, _ = cv2.Rodrigues(np.dot(per_rdg, cam_rdg.T)) restrans = np.dot(-np.array(orig_trans), cam_rdg.T) + np.array(orig_t) stored_parameters['pose'][:3] = (-resrot).flat stored_parameters['trans'][:] = restrans stored_parameters['rt'][:] = [0, 0, 0] stored_parameters['t'][:] = [0, 0, 0] ####################################################################### # Get the full rendered mesh. model.pose[:] = stored_parameters['pose'] model.trans[:] = stored_parameters['trans'] mesh.v = model.r mesh_points = mesh.v[tuple(landmarks.values()), ] # Get the skeleton joints. J_onbetas = model.J_regressor.dot(mesh.v) skeleton_points = J_onbetas[(8, 5, 2, 1, 4, 7, 21, 19, 17, 16, 18, 20), ] # Do the projection. camera = _odr_c.ProjectPoints(rt=stored_parameters['rt'], t=stored_parameters['t'], f=(stored_parameters['f'], stored_parameters['f']), c=np.array(resolution) / 2., k=np.zeros(5)) camera.v = np.vstack((skeleton_points, mesh_points)) full_parameters.append( list(stored_parameters['betas']) + list(stored_parameters['pose']) + list(stored_parameters['trans']) + list(stored_parameters['rt']) + list(stored_parameters['t']) + [stored_parameters['f']]) landmark_positions.append(list(camera.r.T.copy().flat)) return landmark_positions, full_parameters
pil_im = Image.fromarray(cv2.cvtColor(cv_orig_im, cv2.COLOR_BGR2RGB)) trans_pil_im, R_trans = transformImage(pil_im, cv_orig_im, landmarks) in_im = transform(trans_pil_im) op_cls, op_z, op_vp, _ = vpnet(in_im[None, :, :].cuda()) vp = vpnet.module.compute_vp_pred(op_vp) pred_a = np.rad2deg(vp['a'].cpu().detach().numpy()) pred_e = np.rad2deg(vp['e'].cpu().detach().numpy()) pred_t = np.rad2deg(vp['t'].cpu().detach().numpy()) linreg_pred_a = linreg_a.predict(pred_a.reshape(pred_a.shape[0], -1)) linreg_pred_e = linreg_e.predict(pred_e.reshape(pred_e.shape[0], -1)) linreg_pred_t = linreg_t.predict(pred_t.reshape(pred_t.shape[0], -1)) pred_R = cv2.Rodrigues( np.asarray([linreg_pred_a, linreg_pred_e, linreg_pred_a]))[0] pred_R = pred_R.astype(np.float64) cv_im = cv_orig_im.copy() imgn, proj_points = drawPose(cv_orig_im.copy(), R_rgb, T_rgb, calibration['intrinsics'], calibration['dist']) tdx_p, tdy_p, xp, yp, im_axes = draw_axis(cv_im, linreg_pred_a, linreg_pred_e, linreg_pred_t, tdx=proj_points[0, 0, 0], tdy=proj_points[0, 0, 1]) dx_p = xp - tdx_p dy_p = yp - tdy_p
def singleAruRelPos(queryImg, corners, Id, markerSize_mm, camera_matrix, camera_dist_coefs, superimpAru='none', tglDrawMark=0, tglDrawCenter=0): # positiion estimation rvecs, tvecs = aruco.estimatePoseSingleMarkers(corners, markerSize_mm, camera_matrix, camera_dist_coefs) (rvecs - tvecs).any() # get rid of that nasty numpy value array error # distance [mm] distnc_mm = np.sqrt((tvecs**2).sum()) # rotation and projection matrix rotation_matrix = cv.Rodrigues(rvecs)[0] P = np.hstack((rotation_matrix, np.reshape(tvecs, [3, 1]))) # euler_angles_degrees = - cv.decomposeProjectionMatrix(P)[6] # euler_angles_radians = euler_angles_degrees * np.pi / 180 # substitute marker with distance of Id if superimpAru == 'distance': queryImg = distOverAruco(round(distnc_mm, 1), corners, queryImg) elif superimpAru == 'id': queryImg = IdOverAruco(Id, corners, queryImg) # draws axis half of the size of the marker if tglDrawMark: markerDim_px = np.sqrt((corners[0][0][0] - corners[0][3][0])**2 + (corners[0][0][1] - corners[0][3][1])**2) aruco.drawAxis(queryImg, camera_matrix, camera_dist_coefs, rvecs, tvecs, int(markerDim_px // 4)) if tglDrawCenter: centerx, centery = np.abs(corners[0][0] + corners[0][2]) / 2 markerDim_px = np.sqrt((corners[0][0][0] - corners[0][3][0])**2 + (corners[0][0][1] - corners[0][3][1])**2) queryImg = cv.circle(queryImg, (int(centerx), int(centery)), int(markerDim_px / 16), (255, 255, 0), -1) return queryImg, distnc_mm, P ################################## # bibliography # https://docs.opencv.org/4.2.0/d5/dae/tutorial_aruco_detection.html # for the parameters list ############################ # old sharp code # marker_dim_px = np.sqrt((corners[0][0][0] - corners[0][3][0])**2 + (corners[0][0][1] - corners[0][3][1])**2) # # distance_mm = marker_real_world_mm * (np.max(imgShape) / sensorWidth) * focal_lenght / marker_dim_px # markerSquare_cm = np.float32([[0, 0, 0], [0, mrkSiz_cm, 0], [mrkSiz_cm, mrkSiz_cm, 0], [mrkSiz_cm, 0, 0]]) # _, rvecs, tvecs = cv.solvePnP(markerSquare_cm, corners, camera_matrix, camera_dist_coefs) # # axis = np.float32([[3,0,0], [0,3,0], [0,0,3]]).reshape(-1,3)# Array for drawing the 3 cartesian axes # imgpts, jac = cv.projectPoints(axis, rvecs, tvecs, camera_matrix, camera_dist_coefs) # # centerx,centery=np.abs(corners[0][0] + corners[0][2])/2
def rot(axis, angle, matrix_len=4): R_vec = np.array(axis).astype(float) * angle R, _ = cv2.Rodrigues(R_vec) if matrix_len == 4: R = rot3x3_to_4x4(R) return R
else: pass while(True): seqName = args['seq'] id = args['id'] # read image, depths maps and annotations img = read_RGB_img(baseDir, seqName, id, split) depth = read_depth_img(baseDir, seqName, id, split) anno = read_annotation(baseDir, seqName, id, split) # get object 3D corner locations for the current pose objCorners = anno['objCorners3DRest'] objCornersTrans = np.matmul(objCorners, cv2.Rodrigues(anno['objRot'])[0].T) + anno['objTrans'] # get the hand Mesh from MANO model for the current pose if split == 'train': handJoints3D, handMesh = forwardKinematics(anno['handPose'], anno['handTrans'], anno['handBeta']) # project to 2D if split == 'train': handKps = project_3D_points(anno['camMat'], handJoints3D, is_OpenGL_coords=True) else: # Only root joint available in evaluation split handKps = project_3D_points(anno['camMat'], np.expand_dims(anno['handJoints3D'],0), is_OpenGL_coords=True) objKps = project_3D_points(anno['camMat'], objCornersTrans, is_OpenGL_coords=True) # Visualize if args['visType'] == 'open3d':
# -- Unpack the output, get only the first rvec, tvec = ret[0][0, 0, :], ret[1][0, 0, :] # -- Draw the detected marker and put a reference frame over it aruco.drawDetectedMarkers(frame, corners) aruco.drawAxis(frame, camera_matrix, camera_distortion, rvec, tvec, 10) # -- Print the tag position in camera frame str_position = "MARKER Position x=%4.0f y=%4.0f z=%4.0f" % ( tvec[0], tvec[1], tvec[2]) cv2.putText(frame, str_position, (0, 100), font, 1, (0, 255, 0), 2, cv2.LINE_AA) # -- Obtain the rotation matrix tag->camera R_ct = np.matrix(cv2.Rodrigues(rvec)[0]) R_tc = R_ct.T # -- Get the attitude in terms of euler 321 (Needs to be flipped first) roll_marker, pitch_marker, yaw_marker = rotationMatrixToEulerAngles( R_flip * R_tc) # -- Print the marker's attitude respect to camera frame str_attitude = "MARKER Attitude r=%4.0f p=%4.0f y=%4.0f" % ( math.degrees(roll_marker), math.degrees(pitch_marker), math.degrees(yaw_marker)) cv2.putText(frame, str_attitude, (0, 150), font, 1, (0, 255, 0), 2, cv2.LINE_AA) # -- Now get Position and attitude f the camera respect to the marker pos_camera = -R_tc * np.matrix(tvec).T
cv2.imshow('brightness2', frameWithBrightnessPoint) # If found, add object points, image points (after refining them) if ret1 and ret2: corners2_1 = cv2.cornerSubPix(gray1, corners1, (11, 11), (-1, -1), criteria) ret1, rvecs1, tvecs1, inliers1 = cv2.solvePnPRansac( objp, corners2_1, mtx1, dist1) corners2_2 = cv2.cornerSubPix(gray2, corners2, (11, 11), (-1, -1), criteria) ret2, rvecs2, tvecs2, inliers2 = cv2.solvePnPRansac( objp, corners2_2, mtx2, dist2) if ret1 and ret2: rotMatrix1, _ = cv2.Rodrigues(rvecs1) rotMatrix2, _ = cv2.Rodrigues(rvecs2) imgpts1, _ = cv2.projectPoints(axis, rvecs1, tvecs1, mtx1, dist1) imgpts2, _ = cv2.projectPoints(axis, rvecs2, tvecs2, mtx2, dist2) img1 = draw(dst1, corners2_1, imgpts1) img2 = draw(dst2, corners2_2, imgpts2) cv2.imshow('img1', img1) cv2.imshow('img2', img2) msg1 = np.array([rotMatrix1, np.transpose(tvecs1)]).tobytes('C') msg2 = np.array([rotMatrix2, np.transpose(tvecs2)]).tobytes('C') sock.sendto(
def __getitem__(self, index): img_path = os.path.join(self.data_root, self.ims[index]) img = imageio.imread(img_path).astype(np.float32) / 255. img = cv2.resize(img, (1024, 1024)) msk = self.get_mask(index) cam_ind = self.cam_inds[index] K = np.array(self.cams['K'][cam_ind]) D = np.array(self.cams['D'][cam_ind]) img = cv2.undistort(img, K, D) msk = cv2.undistort(msk, K, D) R = np.array(self.cams['R'][cam_ind]) T = np.array(self.cams['T'][cam_ind]) / 1000. # reduce the image resolution by ratio H, W = int(img.shape[0] * cfg.ratio), int(img.shape[1] * cfg.ratio) img = cv2.resize(img, (W, H), interpolation=cv2.INTER_AREA) msk = cv2.resize(msk, (W, H), interpolation=cv2.INTER_NEAREST) if cfg.mask_bkgd: img[msk == 0] = 0 K[:2] = K[:2] * cfg.ratio if self.human in ['CoreView_313', 'CoreView_315']: i = int(os.path.basename(img_path).split('_')[4]) else: i = int(os.path.basename(img_path)[:-4]) feature, coord, out_sh, can_bounds, bounds, Rh, Th, center, rot, trans = self.prepare_input( i) if cfg.sample_smpl: depth_path = os.path.join(self.data_root, 'depth', self.ims[index])[:-4] + '.npy' depth = np.load(depth_path) rgb, ray_o, ray_d, near, far, coord_, mask_at_box = if_nerf_dutils.sample_smpl_ray( img, msk, depth, K, R, T, self.nrays, self.split) elif cfg.sample_grid: # print('sample_grid') rgb, ray_o, ray_d, near, far, coord_, mask_at_box = if_nerf_dutils.sample_ray_grid( img, msk, K, R, T, can_bounds, self.nrays, self.split) else: rgb, ray_o, ray_d, near, far, coord_, mask_at_box = if_nerf_dutils.sample_ray_h36m( img, msk, K, R, T, can_bounds, self.nrays, self.split) acc = if_nerf_dutils.get_acc(coord_, msk) ret = { 'feature': feature, 'coord': coord, 'out_sh': out_sh, 'rgb': rgb, 'ray_o': ray_o, 'ray_d': ray_d, 'near': near, 'far': far, 'acc': acc, 'mask_at_box': mask_at_box } R = cv2.Rodrigues(Rh)[0].astype(np.float32) i = index // self.num_cams meta = { 'bounds': bounds, 'R': R, 'Th': Th, 'center': center, 'rot': rot, 'trans': trans, 'i': i, 'cam_ind': cam_ind } ret.update(meta) return ret
glEnd() if __name__ == '__main__': rotation3d = np.array([1, 2, 3], dtype=np.float32) translation3d = np.array([50, 60, 70], dtype=np.float32) # transformation from Camera Optical Center: # first: translate from Camera center to object origin. # second: rotate x,y,z # coordinate system is x,y,z positive (not like opengl, where the z-axis is flipped.) # print rotation3d[0],rotation3d[1],rotation3d[2], translation3d[0],translation3d[1],translation3d[2] #turn translation vectors into 3x3 rot mat. rotation3dMat, _ = cv2.Rodrigues(rotation3d) #to get the transformation from object to camera we need to reverse rotation and translation # tranform3d_to_camera_translation = np.eye(4, dtype=np.float32) tranform3d_to_camera_translation[:-1, -1] = -translation3d #rotation matrix inverse == transpose tranform3d_to_camera_rotation = np.eye(4, dtype=np.float32) tranform3d_to_camera_rotation[:-1, :-1] = rotation3dMat.T print(tranform3d_to_camera_translation) print(tranform3d_to_camera_rotation) print( np.matrix(tranform3d_to_camera_rotation) * np.matrix(tranform3d_to_camera_translation))
def ransac(stereo_pair, matched_fps, stereo_cache=None, get_pose=False, refine=False): assert type(stereo_pair) == sequences.PairWithStereo if stereo_cache is not None: raise NotImplementedError P_C0, full_has_depth = stereo_cache[stereo_pair.imname(0)] else: P_C0, has_depth = pointsFromStereo( [stereo_pair.im[0], stereo_pair.rim_0], matched_fps[0].ips_rc, stereo_pair.K, stereo_pair.baseline) valid_ips1 = matched_fps[1].ips_rc[:, has_depth] if np.count_nonzero(has_depth) < 5: print('Warning: Not enough depths!') if not get_pose: return np.zeros_like(has_depth).astype(bool) else: return np.zeros_like(has_depth).astype(bool), None assert P_C0 is not None assert valid_ips1 is not None try: if np.count_nonzero(has_depth) == 4: # For some reason, this does not want to work... _, rvec, tvec = cv2.solvePnP(objectPoints=np.float32(P_C0.T), imagePoints=np.fliplr( np.float32(valid_ips1.T)), cameraMatrix=stereo_pair.K, distCoeffs=None, flags=cv2.SOLVEPNP_P3P) inliers = np.ones(4, dtype=bool) else: _, rvec, tvec, inliers = cv2.solvePnPRansac( np.float32(P_C0.T), np.fliplr(np.float32(valid_ips1.T)), stereo_pair.K, distCoeffs=None, reprojectionError=3, flags=cv2.SOLVEPNP_P3P, iterationsCount=3000) except cv2.error as e: print(P_C0) print(valid_ips1) raise e if inliers is None: print('Warning: Ransac returned None inliers!') if not get_pose: return np.zeros_like(has_depth).astype(bool) else: return np.zeros_like(has_depth).astype(bool), None inliers = np.ravel(inliers) if refine: _, rvec, tvec = cv2.solvePnP(objectPoints=np.float32(P_C0.T)[inliers], imagePoints=np.fliplr( np.float32(valid_ips1.T)[inliers]), cameraMatrix=stereo_pair.K, distCoeffs=None, rvec=rvec, tvec=tvec, useExtrinsicGuess=True, flags=cv2.SOLVEPNP_ITERATIVE) inl_indices = np.nonzero(has_depth)[0][inliers] inlier_mask = np.bincount(inl_indices, minlength=has_depth.size).astype(bool) if not get_pose: return inlier_mask else: T_1_0 = rpg_common_py.pose.Pose(cv2.Rodrigues(rvec)[0], tvec) return inlier_mask, T_1_0
def _get_location(self, visible_markers, camera_calibration, min_marker_perimeter, min_id_confidence, locate_3d=False): filtered_markers = [ m for m in visible_markers if m['perimeter'] >= min_marker_perimeter and m['id_confidence'] > min_id_confidence ] marker_by_id = {} #if an id shows twice we use the bigger marker (usually this is a screen camera echo artifact.) for m in filtered_markers: if m["id"] in marker_by_id and m["perimeter"] < marker_by_id[ m['id']]['perimeter']: pass else: marker_by_id[m["id"]] = m visible_ids = set(marker_by_id.keys()) requested_ids = set(self.markers.keys()) overlap = visible_ids & requested_ids # need at least two markers per surface when the surface is more that 1 marker. if overlap and len(overlap) >= min(2, len(requested_ids)): detected = True xy = np.array([marker_by_id[i]['verts'] for i in overlap]) uv = np.array([self.markers[i].uv_coords for i in overlap]) uv.shape = (-1, 1, 2) # our camera lens creates distortions we want to get a good 2d estimate despite that so we: # compute the homography transform from marker into the undistored normalized image space # (the line below is the same as what you find in methods.undistort_unproject_pts, except that we ommit the z corrd as it is always one.) xy_undistorted_normalized = cv2.undistortPoints( xy.reshape(-1, 1, 2), camera_calibration['camera_matrix'], camera_calibration['dist_coefs'] * self.use_distortion) m_to_undistored_norm_space, mask = cv2.findHomography( uv, xy_undistorted_normalized, method=cv2.RANSAC, ransacReprojThreshold=0.1) if not mask.all(): detected = False m_from_undistored_norm_space, mask = cv2.findHomography( xy_undistorted_normalized, uv) # project the corners of the surface to undistored space corners_undistored_space = cv2.perspectiveTransform( marker_corners_norm.reshape(-1, 1, 2), m_to_undistored_norm_space) # project and distort these points and normalize them corners_redistorted, corners_redistorted_jacobian = cv2.projectPoints( cv2.convertPointsToHomogeneous(corners_undistored_space), np.array([0, 0, 0], dtype=np.float32), np.array([0, 0, 0], dtype=np.float32), camera_calibration['camera_matrix'], camera_calibration['dist_coefs'] * self.use_distortion) corners_nulldistorted, corners_nulldistorted_jacobian = cv2.projectPoints( cv2.convertPointsToHomogeneous(corners_undistored_space), np.array([0, 0, 0], dtype=np.float32), np.array([0, 0, 0], dtype=np.float32), camera_calibration['camera_matrix'], camera_calibration['dist_coefs'] * 0) #normalize to pupil norm space corners_redistorted.shape = -1, 2 corners_redistorted /= camera_calibration['resolution'] corners_redistorted[:, -1] = 1 - corners_redistorted[:, -1] #normalize to pupil norm space corners_nulldistorted.shape = -1, 2 corners_nulldistorted /= camera_calibration['resolution'] corners_nulldistorted[:, -1] = 1 - corners_nulldistorted[:, -1] # maps for extreme lens distortions will behave irratically beyond the image bounds # since our surfaces often extend beyond the screen we need to interpolate # between a distored projection and undistored one. # def ratio(val): # centered_val = abs(.5 - val) # # signed distance to img cennter .5 is imag bound # # we look to interpolate between .7 and .9 # inter = max() corners_robust = [] for nulldist, redist in zip(corners_nulldistorted, corners_redistorted): if -.4 < nulldist[0] < 1.4 and -.4 < nulldist[1] < 1.4: corners_robust.append(redist) else: corners_robust.append(nulldist) corners_robust = np.array(corners_robust) if self.old_corners_robust is not None and np.mean( np.abs(corners_robust - self.old_corners_robust)) < 0.02: smooth_corners_robust = self.old_corners_robust smooth_corners_robust += .5 * (corners_robust - self.old_corners_robust) corners_robust = smooth_corners_robust self.old_corners_robust = smooth_corners_robust else: self.old_corners_robust = corners_robust #compute a perspective thransform from from the marker norm space to the apparent image. # The surface corners will be at the right points # However the space between the corners may be distored due to distortions of the lens, m_to_screen = m_verts_to_screen(corners_robust) m_from_screen = m_verts_from_screen(corners_robust) camera_pose_3d = None if locate_3d: dist_coef, = camera_calibration['dist_coefs'] img_size = camera_calibration['resolution'] K = camera_calibration['camera_matrix'] # 3d marker support pose estimation: # scale normalized object points to world space units (think m,cm,mm) uv.shape = -1, 2 uv *= [self.real_world_size['x'], self.real_world_size['y']] # convert object points to lie on z==0 plane in 3d space uv3d = np.zeros((uv.shape[0], uv.shape[1] + 1)) uv3d[:, :-1] = uv xy.shape = -1, 1, 2 # compute pose of object relative to camera center is3dPoseAvailable, rot3d_cam_to_object, translate3d_cam_to_object = cv2.solvePnP( uv3d, xy, K, dist_coef, flags=cv2.SOLVEPNP_EPNP) if is3dPoseAvailable: # not verifed, potentially usefull info: http://stackoverflow.com/questions/17423302/opencv-solvepnp-tvec-units-and-axes-directions ###marker posed estimation from virtually projected points. # object_pts = np.array([[[0,0],[0,1],[1,1],[1,0]]],dtype=np.float32) # projected_pts = cv2.perspectiveTransform(object_pts,self.m_to_screen) # projected_pts.shape = -1,2 # projected_pts *= img_size # projected_pts.shape = -1, 1, 2 # # scale object points to world space units (think m,cm,mm) # object_pts.shape = -1,2 # object_pts *= self.real_world_size # # convert object points to lie on z==0 plane in 3d space # object_pts_3d = np.zeros((4,3)) # object_pts_3d[:,:-1] = object_pts # self.is3dPoseAvailable, rot3d_cam_to_object, translate3d_cam_to_object = cv2.solvePnP(object_pts_3d, projected_pts, K, dist_coef,flags=cv2.CV_EPNP) # transformation from Camera Optical Center: # first: translate from Camera center to object origin. # second: rotate x,y,z # coordinate system is x,y,z where z goes out from the camera into the viewed volume. # print rot3d_cam_to_object[0],rot3d_cam_to_object[1],rot3d_cam_to_object[2], translate3d_cam_to_object[0],translate3d_cam_to_object[1],translate3d_cam_to_object[2] #turn translation vectors into 3x3 rot mat. rot3d_cam_to_object_mat, _ = cv2.Rodrigues( rot3d_cam_to_object) #to get the transformation from object to camera we need to reverse rotation and translation translate3d_object_to_cam = -translate3d_cam_to_object # rotation matrix inverse == transpose rot3d_object_to_cam_mat = rot3d_cam_to_object_mat.T # we assume that the volume of the object grows out of the marker surface and not into it. We thus have to flip the z-Axis: flip_z_axix_hm = np.eye(4, dtype=np.float32) flip_z_axix_hm[2, 2] = -1 # create a homogenous tranformation matrix from the rotation mat rot3d_object_to_cam_hm = np.eye(4, dtype=np.float32) rot3d_object_to_cam_hm[:-1, :-1] = rot3d_object_to_cam_mat # create a homogenous tranformation matrix from the translation vect translate3d_object_to_cam_hm = np.eye(4, dtype=np.float32) translate3d_object_to_cam_hm[:-1, -1] = translate3d_object_to_cam.reshape( 3) # combine all tranformations into transformation matrix that decribes the move from object origin and orientation to camera origin and orientation tranform3d_object_to_cam = np.matrix( flip_z_axix_hm) * np.matrix( rot3d_object_to_cam_hm) * np.matrix( translate3d_object_to_cam_hm) camera_pose_3d = tranform3d_object_to_cam if detected == False: camera_pose_3d = None m_from_screen = None m_to_screen = None m_from_undistored_norm_space = None m_to_undistored_norm_space = None else: detected = False camera_pose_3d = None m_from_screen = None m_to_screen = None m_from_undistored_norm_space = None m_to_undistored_norm_space = None return { 'detected': detected, 'detected_markers': len(overlap), 'm_from_undistored_norm_space': m_from_undistored_norm_space, 'm_to_undistored_norm_space': m_to_undistored_norm_space, 'm_from_screen': m_from_screen, 'm_to_screen': m_to_screen, 'camera_pose_3d': camera_pose_3d }
def pose_estimator(self , frame): # check to see if we have reached the end of the # video # convert the frame to grayscale, blur it, and detect edges gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) blurred = cv2.GaussianBlur(gray, (7, 7), 0) edged = cv2.Canny(blurred, 50, 150) # find contours in the edge map # print cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE) (_, cnts, _) = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) Area = 0 # loop over the contours for c in cnts: # approximate the contour peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, 0.01 * peri, True) # ensure that the approximated contour is "roughly" rectangular if len(approx) >= 4 and len(approx) <= 6: # compute the bounding box of the approximated contour and # use the bounding box to compute the aspect ratio (x1, y1, w, h) = cv2.boundingRect(approx) # print x1 # print y1 aspectRatio = w / float(h) # compute the solidity of the original contour Area = [] area = cv2.contourArea(c) Area = max(area, Area) # print Area, "area" hullArea = cv2.contourArea(cv2.convexHull(c)) solidity = area / float(hullArea) # compute whether or not the width and height, solidity, and # aspect ratio of the contour falls within appropriate bounds keepDims = w > 25 and h > 25 keepSolidity = solidity > 0.9 keepAspectRatio = aspectRatio >= 0.8 and aspectRatio <= 1.2 # ensure that the contour passes all our tests if keepDims and keepSolidity and keepAspectRatio: # draw an outline around the target and update the status # text cv2.drawContours(frame, [approx], -1, (0, 0, 255), 4) status = "Target(s) Acquired" # This will give you the Pixel location of the rectangular box rc = cv2.minAreaRect(approx[:]) box = cv2.boxPoints(rc) pt = [] for p in box: val = (p[0], p[1]) # print val pt.append(val) # print pt,'pt' # cv2.circle(frame, val, 5, (200, 0, 0), 2) # print 'came to 2' M = cv2.moments(approx) (cX, cY) = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"])) (startX, endX) = (int(cX - (w * 0.15)), int(cX + (w * 0.15))) (startY, endY) = (int(cY - (h * 0.15)), int(cY + (h * 0.15))) cv2.line(frame, (startX, cY), (endX, cY), (0, 255, 0), 3) cv2.line(frame, (cX, startY), (cX, endY), (0, 255, 0), 3) # print cX, "cxxx" # print cY, "cyyyy" x_value = cX print x_value, 'cX' # print x_value, "xfierst" # perspective rows, cols, ch = frame.shape pts1 = np.float32([pt[1], pt[0], pt[2], pt[3]]) # print pts1 pts2 = np.float32([[0, 0], [640, 0], [0, 480], [640, 480]]) M = None # try: M = cv2.getPerspectiveTransform(pts1, pts2) # except: # print "Perspective failed" # print M ret, mtxr, mtxq, qx, qy, qz = cv2.RQDecomp3x3(M) # print qx C = M[2, 1] * M[2, 2] D = M[1, 0] * M[0, 0] Theta_xp = math.degrees(math.atan(C) * 2) # Theta_y = math.degrees(math.atan(C)*2) Theta_zp = math.degrees(math.atan(D) * 2) # print Theta_zp,'z' # print Theta_xp,'x' dst = cv2.warpPerspective(frame, M, (640, 480)) # 2D image points. If you change the image, you need to change vector image_points = np.array([pt], dtype="double") # print image_points size = frame.shape # 3D model points. model_points = np.array([ (0.0, 0.0, 0.0), # Rectangle center (0.0, 13.6, 0.0), # (13.6, 13.6, 0.0), # (13.6, 0.0, 0.0), # ]) # Camera intrinsic parameters focal_length = size[1] center = (size[1] / 2, size[0] / 2) camera_matrix = np.array( [[focal_length, 0, center[0]], [0, focal_length, center[1]], [0, 0, 1]], dtype="double" ) # print "Camera Matrix :\n {0}".format(camera_matrix) criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001) dist_coeffs = np.zeros((4, 1)) # Assuming no lens distortion (success, rotation_vector, translation_vector) = cv2.solvePnP(model_points, image_points, camera_matrix, dist_coeffs, criteria) rotationMatrix = np.zeros((3, 3), np.float32) # print "Rotation Vector:\n {0}".format(rotation_vector) # print "Translation Vector:\n {0}".format(translation_vector) # Rodrigues to convert it into a Rotation vector dst, jacobian = cv2.Rodrigues(rotation_vector) # print "Rotation matrix:\n {0}".format(dst) # sy = math.sqrt(dst[0, 0] * dst[0, 0] + dst[1, 0] * [1, 0]) A = dst[2, 1] * dst[2, 2] B = dst[1, 0] * dst[0, 0] # C = -dst[2,0]*sy # Eular angles Theta_x = math.degrees(math.atan(A) * 2) # Theta_y = math.degrees(math.atan(C)*2) Theta_z = math.degrees(math.atan(B) * 2) # print "Roll axis:\n {0}".format(Theta_x) # print "Yaw axis:\n {0}".format(Theta_z) (img_pts, jacobian) = cv2.projectPoints(np.array([(0.0, 0.0, 1000.0)]), rotation_vector, translation_vector, camera_matrix, dist_coeffs) # print img_pts # draw(frame,corners=pt,imgpts = img_pts) # if translation_vector[2,0]>160 and translation_vector[2,0]<180: # print "go straight" # cv2.line(frame,(200,0),(200,480),(0,255,0),2) # cv2.line(frame, (440, 0), (440, 480), (0, 255, 0), 2) # print translation_vector[2,0] self.testPID(1.0, 1.0, 0) # draw the status text on the frame cv2.putText(frame, status, (20, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) # cv2.line(frame, (320, 0), (320, 480), (255, 0, 0), 2) # cv2.line(frame,(260,0),(260,480),(0,255,0),2) # cv2.line(frame, (380, 0), (380, 480), (0, 255, 0), 2) # show the frame and record if a key is pressed cv2.imshow("Frame", frame) cv2.waitKey(1)
def main(source=0): detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor(PREDICTOR_PATH) cap = cv2.VideoCapture(source) while True: GAZE = "Face Not Found" ret, img = cap.read() if not ret: print(f"[ERROR - System]Cannot read from source: {source}") break faces = detector(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), 0) face3Dmodel = world.ref3DModel() for face in faces: shape = predictor(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), face) draw(img, shape) refImgPts = world.ref2dImagePoints(shape) height, width, channels = img.shape focalLength = args.focal * width cameraMatrix = world.cameraMatrix(focalLength, (height / 2, width / 2)) mdists = np.zeros((4, 1), dtype=np.float64) # calculate rotation and translation vector using solvePnP success, rotationVector, translationVector = cv2.solvePnP( face3Dmodel, refImgPts, cameraMatrix, mdists) noseEndPoints3D = np.array([[0, 0, 1000.0]], dtype=np.float64) noseEndPoint2D, jacobian = cv2.projectPoints( noseEndPoints3D, rotationVector, translationVector, cameraMatrix, mdists) # draw nose line p1 = (int(refImgPts[0, 0]), int(refImgPts[0, 1])) p2 = (int(noseEndPoint2D[0, 0, 0]), int(noseEndPoint2D[0, 0, 1])) cv2.line(img, p1, p2, (110, 220, 0), thickness=2, lineType=cv2.LINE_AA) # calculating euler angles rmat, jac = cv2.Rodrigues(rotationVector) angles, mtxR, mtxQ, Qx, Qy, Qz = cv2.RQDecomp3x3(rmat) if angles[1] < -15: GAZE = "Looking: Left" elif angles[1] > 15: GAZE = "Looking: Right" else: GAZE = "Forward" cv2.putText(img, GAZE, (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 80), 2) cv2.imshow("Head Pose", img) key = cv2.waitKey(10) & 0xFF if key == 27: break cap.release() cv2.destroyAllWindows()
mtx3 = [0.00000000e+00, 0.00000000e+00, 1.00000000e+00] mtx = np.array([mtx1, mtx2, mtx3]) mp1 = (-83.75, -47.78, 0) mp2 = (83.75, -47.78, 0) mp3 = (0, 145.06, 0) mp4 = (0, 0, 0) markerPoints = np.array([mp1, mp2, mp3, mp4]) #初期位置 #522.jpg mpp1 = (418.0, 814.0) mpp2 = (693.0, 809.0) mpp3 = (552.0, 573.0) mpp4 = (554.0, 732.0) markerPointsonPC = np.array([mpp1, mpp2, mpp3, mpp4]) ret, rvec, tvec = cv2.solvePnP(markerPoints, markerPointsonPC, mtx, dist_coeffs) R = cv2.Rodrigues(rvec)[0] proj_matrix = np.hstack((R, tvec)) euler_angle_begin = cv2.decomposeProjectionMatrix(proj_matrix)[6] print("XRbegin,YRbegin,ZRbegin") print(str(euler_angle_begin[0]), str(euler_angle_begin[1]), str(euler_angle_begin[2]))
#detect markers corners, ids, rejectedimgpoints = cv2.aruco.detectMarkers( image, markers, corners, None, None, None, cameraMatrix, distCoeffs) #chekc if any markers are present if len(corners) > 0: #estimate pose of markers rvecs, tvecs, _objPoints = cv2.aruco.estimatePoseSingleMarkers( corners, markerWidth, cameraMatrix, distCoeffs) #draw detected markes if any present imageCopy = cv2.aruco.drawDetectedMarkers(imageCopy, corners, ids) #iterate over markers (list of rvecs will be the same length as other markers) for i in range(len(rvecs)): rvec = np.matrix(rvecs[i]).reshape((3, 1)) tvec = np.matrix(tvecs[i]).reshape((3, 1)) R, _ = cv2.Rodrigues(rvec) R = np.matrix(R).T tvec = np.dot(-R, np.matrix(tvec)) tvec = np.asarray(tvec) #inverse the perspective #get attute of vectors angle = yawpitchrolldecomposition(cv2.Rodrigues(rvecs[i])) #draw marker # imageCopy = cv2.aruco.drawAxis(imageCopy, cameraMatrix, distCoeffs, rvecs[i], tvecs[i], markerWidth / 2) #print amers postions valuesraw = [ angle[0], angle[1], angle[2], tvec[0, 0], tvec[1, 0], tvec[2, 0]