class Estimator: def __init__(self, intrinsics, tag_size): self._detector = Detector( families="tag36h11", nthreads=4, quad_decimate=1.0, quad_sigma=0.0, refine_edges=1, decode_sharpening=0.5, debug=0, ) self._camera_params = [ intrinsics.fx, intrinsics.fy, intrinsics.ppx, intrinsics.ppy, ] self._tag_size = tag_size def cube_pose(self, image): tags = self._detector.detect( image, estimate_tag_pose=True, camera_params=self._camera_params, tag_size=self._tag_size, ) if len(tags) == 1: euler = mat2euler(tags[0].pose_R) return tags[0].pose_t, euler[2] return None
class Vision: def __init__(self, camera_resolution=(640, 480), camera_framerate=32): self.camera = PiCamera() self.camera.resolution = camera_resolution self.camera.framerate = camera_framerate self.capture = PiRGBArray(self.camera, size=camera_resolution) self.detector = Detector(families="tag36h11") self.show_image = False self.tag_size = params['vision_settings']['tag_size'] self.lens_size = params['vision_settings']['lens_size'] self.camera_x_center = params['vision_settings']['camera_x_center'] self.camera_y_center = params['vision_settings']['camera_y_center'] def capture_continuous(self, format="bgr", use_video_port=True): def frame_generator(): for frame in self.camera.capture_continuous(self.capture, format=format, use_video_port=use_video_port): image = frame.array image = cv2.flip(image, -1) self.capture.truncate(0) results = self.detect(image) # for result in results: # (ptA, ptB, ptC, ptD) = result.corners # ptB = (int(ptB[0]), int(ptB[1])) # ptC = (int(ptC[0]), int(ptC[1])) # ptD = (int(ptD[0]), int(ptD[1])) # ptA = (int(ptA[0]), int(ptA[1])) # # draw the bounding box of the AprilTag detection # cv2.line(image, ptA, ptB, (0, 255, 0), 2) # cv2.line(image, ptB, ptC, (0, 255, 0), 2) # cv2.line(image, ptC, ptD, (0, 255, 0), 2) # cv2.line(image, ptD, ptA, (0, 255, 0), 2) # # draw the center (x, y)-coordinates of the AprilTag # (cX, cY) = (int(result.center[0]), int(result.center[1])) # cv2.circle(image, (cX, cY), 5, (0, 0, 255), -1) # cv2.imshow("Fiducial Detection Image", image) yield results return frame_generator() def detect(self, img): gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) return self.detector.detect(gray, estimate_tag_pose=True, camera_params=(self.lens_size, self.lens_size, self.camera_x_center, self.camera_y_center), tag_size=self.tag_size)
at_detector = Detector(families='tag36h11', nthreads=1, quad_decimate=1.0, quad_sigma=0.0, refine_edges=1, decode_sharpening=0.25, debug=0) imgs = glob.glob(args.input + '/*.jpg') if not os.path.exists(args.input + '/detected'): os.makedirs(args.input + '/detected') for img_f in imgs: img = cv2.imread(img_f, cv2.IMREAD_GRAYSCALE) tags = at_detector.detect(img, estimate_tag_pose=False, camera_params=None, tag_size=None) for tag in tags: pts = tag.corners.astype(int) pts = np.append(pts, [pts[0]], axis=0) pts = pts.reshape((-1, 1, 2)) cv2.polylines(img, [pts], False, (0, 0, 0), 20) print( f'Writing image to {os.path.join(os.path.dirname(img_f),"detected",f"{os.path.basename(img_f)}_detected.jpg")}' ) cv2.imwrite( os.path.join(os.path.dirname(img_f), 'detected', f'{os.path.basename(img_f)}_detected.jpg'), img)
class DetectObject: def __init__(self): self.targetclass = opt.targetclass self.switch = 1 self.__target_class_sub_ = rospy.Subscriber('detect_item', std_msgs.msg.Int32, self.targetClassCB, queue_size=5) self.__target_position_pub_ = rospy.Publisher( 'detect_item_result', geometry_msgs.msg.PointStamped, queue_size=5) self.__apriltag_switch_sub_ = rospy.Subscriber('apriltag_request', std_msgs.msg.Int32, self.apriltag_switch, queue_size=5) self.__apriltag_pose_pub_ = rospy.Publisher( 'apriltag_pose_result', geometry_msgs.msg.PoseStamped, queue_size=5) self.__pipeline = rs.pipeline() config = rs.config() config.enable_stream(rs.stream.depth, 1280, 720, rs.format.z16, 30) config.enable_stream(rs.stream.color, 1280, 720, rs.format.bgr8, 30) # Start streaming self.__pipe_profile = self.__pipeline.start(config) self.at_detector = Detector(families='tag36h11', nthreads=4, quad_decimate=1.0, quad_sigma=0.0, refine_edges=1, decode_sharpening=0.25, debug=0) self.tags = [] def __del__(self): # Close the camera self.__pipeline.stop() def targetClassCB(self, msg): # receive target class index and start to detect self.targetclass = msg.data rospy.loginfo('got target class index:%d', self.targetclass) with torch.no_grad(): self.__detect() return def __detect(self, save_img=False): # Declare pointcloud object, for calculating pointclouds and texture mappings pc = rs.pointcloud() # Create an align object # rs.align allows us to perform alignment of depth frames to others frames # The "align_to" is the stream type to which we plan to align depth frames. align_to = rs.stream.color align = rs.align(align_to) frames = self.__pipeline.wait_for_frames() aligned_frames = align.process(frames) depth_frame = aligned_frames.get_depth_frame() color_frame = aligned_frames.get_color_frame() # time.sleep(8) img_color = np.array(color_frame.get_data()) img_depth = np.array(depth_frame.get_data()) cv2.imwrite(opt.source, img_color) # detection section img_size = ( 320, 192 ) if ONNX_EXPORT else opt.img_size # (320, 192) or (416, 256) or (608, 352) for (height, width) out, source, weights, half, view_img, save_txt = opt.output, opt.source, opt.weights, opt.half, opt.view_img, opt.save_txt webcam = source == '0' or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize device = torch_utils.select_device( device='cpu' if ONNX_EXPORT else opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Initialize model model = Darknet(opt.cfg, img_size) # Load weights attempt_download(weights) if weights.endswith('.pt'): # pytorch format model.load_state_dict( torch.load(weights, map_location=device)['model']) else: # darknet format load_darknet_weights(model, weights) # Second-stage classifier classify = False if classify: modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Eval mode model.to(device).eval() # Fuse Conv2d + BatchNorm2d layers # model.fuse() # Export mode if ONNX_EXPORT: model.fuse() img = torch.zeros((1, 3) + img_size) # (1, 3, 320, 192) f = opt.weights.replace(opt.weights.split('.')[-1], 'onnx') # *.onnx filename torch.onnx.export(model, img, f, verbose=False, opset_version=11) # Validate exported model import onnx model = onnx.load(f) # Load the ONNX model onnx.checker.check_model(model) # Check that the IR is well formed print(onnx.helper.printable_graph(model.graph) ) # Print a human readable representation of the graph return # Half precision half = half and device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=img_size) else: save_img = True dataset = LoadImages(source, img_size=img_size) # Get names and colors names = load_classes(opt.names) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() _ = model(torch.zeros( (1, 3, img_size, img_size), device=device)) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = torch_utils.time_synchronized() pred = model(img, augment=opt.augment)[0] t2 = torch_utils.time_synchronized() # to float if half: pred = pred.float() # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, multi_label=False, classes=opt.classes, agnostic=opt.agnostic_nms) # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i] else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) s += '%gx%g ' % img.shape[2:] # print string position_result = geometry_msgs.msg.PointStamped() if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # stop_num = 0 # print('c = ',len(det)) target_detected = False # Write results for *xyxy, conf, cls in det: # c1, c2 = (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3])) # c0 = ((int(xyxy[0])+int(xyxy[2]))/2, (int(xyxy[1])+int(xyxy[3]))/2) if save_txt: # Write to file with open(save_path + '.txt', 'a') as file: file.write( ('%g ' * 6 + '\n') % (*xyxy, cls, conf)) if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)]) # print(c0) if int(cls) == self.targetclass: x_center = int((int(xyxy[0]) + int(xyxy[2])) / 2) y_center = int((int(xyxy[1]) + int(xyxy[3])) / 2) # Intrinsics and Extrinsics depth_intrin = depth_frame.profile.as_video_stream_profile( ).intrinsics color_intrin = color_frame.profile.as_video_stream_profile( ).intrinsics depth_to_color_extrin = depth_frame.profile.get_extrinsics_to( color_frame.profile) # Depth scale: units of the values inside a depth frame, how to convert the value to units of 1 meter depth_sensor = self.__pipe_profile.get_device( ).first_depth_sensor() depth_scale = depth_sensor.get_depth_scale() # Map depth to color depth_pixel = [240, 320] # Random pixel depth_point = rs.rs2_deproject_pixel_to_point( depth_intrin, depth_pixel, depth_scale) color_point = rs.rs2_transform_point_to_point( depth_to_color_extrin, depth_point) color_pixel = rs.rs2_project_point_to_pixel( color_intrin, color_point) pc.map_to(color_frame) points = pc.calculate(depth_frame) vtx = np.array(points.get_vertices()) tex = np.array(points.get_texture_coordinates()) pix_num = 1280 * y_center + x_center point_cloud_value = [ np.float(vtx[pix_num][0]), np.float(vtx[pix_num][1]), np.float(vtx[pix_num][2]) ] print('point_cloud_value:', point_cloud_value, names[int(cls)], int(cls)) if np.float(vtx[pix_num][2]) > 0.11: position_result.header.frame_id = 'camera_color_optical_frame' position_result.point.x = np.float( vtx[pix_num][0]) position_result.point.y = np.float( vtx[pix_num][1]) position_result.point.z = np.float( vtx[pix_num][2]) self.__target_position_pub_.publish( position_result) # publish the result target_detected = True rospy.loginfo('The target has been detected!') break # only the target class # os.system('rostopic pub -1 /goal_pose geometry_msgs/PointStamped [0,[0,0],zed_left_camera_optical_frame] [%s,%s,%s]' %(np.float(vtx[pix_num][0]),np.float(vtx[pix_num][1]),np.float(vtx[pix_num][2])) # os.system('rostopic pub -1 /start_plan std_msgs/Bool 1') # else: # os.system('rostopic pub -1 /start_plan std_msgs/Bool 0') # print("Can't estimate point cloud at this position.") # stop_num += 1 # if stop_num >= len(det): # os.system('rostopic pub -1 /start_plan std_msgs/Bool 0') if not target_detected: position_result.header.frame_id = 'empty' self.__target_position_pub_.publish( position_result) # publish failure topic rospy.logwarn('Fail to detect the target!') else: position_result.header.frame_id = 'empty' self.__target_position_pub_.publish( position_result) # publish failure topic rospy.logwarn('Fail to detect any target!') # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + out + ' ' + save_path) print('Done. (%.3fs)' % (time.time() - t0)) return def apriltag_switch(self, msg): self.switch = msg.data if self.switch == 12: rospy.loginfo('begin to detect small tag') self.__apriltag_detect(0.03) # print('finish a detection') else: rospy.loginfo('begin to detect big tag') self.__apriltag_detect() return def __apriltag_detect(self, tag_size=0.083): # Wait for a coherent pair of color frame frames = self.__pipeline.wait_for_frames() color_frame = frames.get_color_frame() color_image = np.asanyarray(color_frame.get_data()) # get the camera intrinsics color_intrin = color_frame.profile.as_video_stream_profile().intrinsics color_intrin_part = [ color_intrin.fx, color_intrin.fy, color_intrin.ppx, color_intrin.ppy ] gray_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2GRAY) self.tags = self.at_detector.detect(gray_image, estimate_tag_pose=True, camera_params=color_intrin_part, tag_size=tag_size) if self.tags: is_break = False for tag in self.tags: if tag.tag_id == self.switch: r = Rota.from_matrix(tag.pose_R) r_quat = r.as_quat() # r_euler = r.as_euler('zxy', degrees=True) r_pose = tag.pose_t tag_id = tag.tag_id # print(r_pose, tag_id) p1 = geometry_msgs.msg.PoseStamped() # br = tf.TransformBroadcaster() p1.header.frame_id = 'apriltag_' + str(tag_id) p1.pose.orientation.w = r_quat[3] p1.pose.orientation.x = r_quat[0] p1.pose.orientation.y = r_quat[1] p1.pose.orientation.z = r_quat[2] p1.pose.position.x = r_pose[0] p1.pose.position.y = r_pose[1] p1.pose.position.z = r_pose[2] self.__apriltag_pose_pub_.publish(p1) rospy.loginfo('The target april_tag has been detected!') # br.sendTransform((r_pose[0], r_pose[1], r_pose[2]), # p1.pose.orientation(), # rospy.Time.now(), # 'apriltag_' + str(tag_id), # "world") # time.sleep(1) is_break = True break else: continue if not is_break: p2 = geometry_msgs.msg.PoseStamped() p2.header.frame_id = 'empty' p2.pose.orientation.w = 1 p2.pose.orientation.x = 0 p2.pose.orientation.y = 0 p2.pose.orientation.z = 0 p2.pose.position.x = 0 p2.pose.position.y = 0 p2.pose.position.z = 0 self.__apriltag_pose_pub_.publish(p2) rospy.logwarn('Fail to detect target april_tag!') else: p3 = geometry_msgs.msg.PoseStamped() p3.header.frame_id = 'empty' p3.pose.orientation.w = 1 p3.pose.orientation.x = 0 p3.pose.orientation.y = 0 p3.pose.orientation.z = 0 p3.pose.position.x = 0 p3.pose.position.y = 0 p3.pose.position.z = 0 self.__apriltag_pose_pub_.publish(p3) rospy.logwarn('Fail to detect any april_tags!') # print('finish tags', id(tags)) # Release memory is necessary, to solve the segment fault #del at_detector, tags #gc.collect() self.tags.clear() return
import cv2 import matplotlib.pyplot as plt from pupil_apriltags import Detector import numpy as np img = cv2.imread('left0002.jpg', cv2.IMREAD_GRAYSCALE) detector = Detector() # Astra's intrinsic parameters from ROS # result = detector.detect(img, estimate_tag_pose = True, camera_params = [570.34222, 570.34222, 319.50000, 239.50000], tag_size = 0.08) # Astra's intrinsic parameters given by Aman result = detector.detect( img, estimate_tag_pose=True, camera_params=[518.56666108, 518.80466479, 329.45801792, 237.05589955], tag_size=0.08) R = result[0].pose_R # scaling is required in the translation T = result[0].pose_t * 1000 # creating intermediate homography matrix H' by replacing 3rd column of R with T R[:, 2] = np.transpose(T) # intrinsic matrix of camera K = np.array([[518.56666108, 0., 329.45801792], [0., 518.80466479, 237.05589955], [0., 0., 1.]]) # homography matrix
# append to pair list img_pair.append((img_left, img_right)) src = [] # left dst = [] # right detector = Detector(families='tag36h11', nthreads=1, quad_decimate=1.0, quad_sigma=0.0, refine_edges=1, decode_sharpening=0.25, debug=0) for pair in img_pair: left = pair[0] right = pair[1] tag_left = detector.detect(left) tag_right = detector.detect(right) if len(tag_left) != len(tag_right): continue print("%d apriltags have been detected." % len(tag_left)) for tag in tag_left: # cv2.circle(left, tuple(tag.corners[0].astype(int)), 4, (255, 0, 0), 2) # left-top # cv2.circle(left, tuple(tag.corners[1].astype(int)), 4,(255,0,0), 2) # right-top # cv2.circle(left, tuple(tag.corners[2].astype(int)), 4,(255,0,0), 2) # right-bottom # cv2.circle(left, tuple(tag.corners[3].astype(int)), 4, (255, 0, 0), 2) # left-bottom src.append(tag.corners[0].astype(int)) src.append(tag.corners[1].astype(int)) src.append(tag.corners[2].astype(int)) src.append(tag.corners[3].astype(int)) # cv2.imshow("left", left)
def detect_tags( frames_path: str, aperture=11, visualize=False) -> Tuple[List[List[Dict[str, Any]]], Dict[int, int]]: """Detect all tags (Apriltags3) found in a folder of PNG files and return (1) a list of tag objects for preprocessing and (2) a dictionary containing the frequency that each tag ID appeared Args: frames_path (str): path to the directory containing PNG images aperture (int): visualize (bool): Returns: frames (List[Dict[str, Any]]): list of objects containing id (int), centroid (np.array[int]) and corners (np.array[int]) tag_ds (Dict[int, int]): dictionary mapping tag IDs to frequency of tag across all images """ # Initialize variables frames = [] tag_ids = defaultdict(int) at_detector = Detector() # Sort by index in.../frame<index>.png all_images = sorted(glob(f'{frames_path}/*.png'), key=lambda f: int(os.path.basename(f)[5:-4])) # Deleted last image after out of range error popped up # TODO: but not analyzing last 2 frames? all_images = all_images[:-1] num_images = len(all_images) print_progress_bar(0, num_images, prefix='Progress:', suffix='Complete', length=50) # Iterate thru all PNG images in frames_path for i, img_path in enumerate(all_images): # Create a grayscale 2D NumPy array for Detector.detect() img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) if type(img) == np.ndarray: tags_in_framex = [] for tag in at_detector.detect(img): # Increment frequency tag_ids[tag.tag_id] += 1 # r = np.roll(np.float32(img), tag.homography + 1, axis=0) # Add to frames in following format - feel free to adjust tags_in_framex.append({ 'id': tag.tag_id, 'id_confidence': tag.decision_margin, 'soft_id': tag.tag_id, 'perimeter': 100, #cv2.arcLength(img, closed=True), 'centroid': tag.center, 'verts': tag.corners, 'frames_since_true_detection': 0 }) # {'id': msg, 'id_confidence': id_confidence, 'verts': r.tolist(), 'soft_id': soft_msg, # 'perimeter': cv2.arcLength(r, closed=True), 'centroid': centroid.tolist(), # "frames_since_true_detection": 0} frames.append(tags_in_framex) time.sleep(0.01) print_progress_bar(i + 1, num_images, prefix='Progress:', suffix='Complete', length=50) return frames, dict(tag_ids)
import glob from pupil_apriltags import Detector fx = 472.13107208 fy = 472.3044311 cx = 322.23325564 cy = 238.25801953 tag_size = 0.07 camera_params = ([fx, fy, cx, cy]) at_detector = Detector(families='tag36h11', nthreads=1, quad_decimate=1.0, quad_sigma=0.0, refine_edges=1, decode_sharpening=0.25, debug=0) images = glob.glob('./sample_data/*.jpg') for fname in images: img = cv2.imread(fname) img = cv2.resize(img, (640, 480)) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) tags = at_detector.detect(gray, estimate_tag_pose=True, camera_params=camera_params, tag_size=tag_size) print(tags)
def camera_callback(self, data): cmd_vel_pub = rospy.Publisher('/cmd_vel', Twist, queue_size=1) twist_object = Twist() # initial cmd_vel twist_object.linear.x = 0 twist_object.angular.z = 0 # We select bgr8 because its the OpneCV encoding by default cv_image = self.bridge_object.imgmsg_to_cv2(data, desired_encoding="bgr8") height, width, channels = cv_image.shape # tss = (width/1280)*2.1 # itss = int((width/1280)*2.5) # mll = int(height/4) #Mask uper limit from bottom ## AR TAG DETECTION AND CONTROL height, width, channels = cv_image.shape # frame = cv_image[int(height/2):int(height)][1:int(width)] frame = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY) # April tag detector # detector = apriltag.Detector() detector = Detector() result = detector.detect(frame) tag_detected = False cx = 0 cy = 0 if len(result) == 1: tag_detected = True tag = result[0].tag_family cx = result[0].center[0] cy = result[0].center[1] corner_points = result[0].corners print(cx, cy) if len(result) > 1: print("Multiple tags detected") if tag_detected: print("Tag detected") global kp, kd, pre_error, temp # Control err = 0 if tag_detected: err = cx - width / 2 + 250 if err > -5 and err < 5: #avoid steady state oscillation err = 0 twist_object.angular.z = np.clip( (-float(err) * kp / 100 + kd * (-err + pre_error) / 100), -0.2, 0.2) twist_object.linear.x = 0.1 pre_error = temp print("Mode: Tag following") # cv_image = cv2.putText(cv_image,'Mode: Tag detected',(int(20*tss),int(23*tss)),cv2.FONT_HERSHEY_SIMPLEX,0.5*tss,(0,0,255),1*itss, cv2.LINE_AA) if tag_detected == False: twist_object.linear.x = 0 twist_object.angular.z = 0 print("Mode: Tag finding") # cv_image = cv2.putText(cv_image,'Mode: Tag finding',(int(20*tss),int(23*tss)),cv2.FONT_HERSHEY_SIMPLEX,0.5*tss,(0,0,255),1*itss, cv2.LINE_AA) # time global t1 t0 = float(rospy.Time.now().to_sec()) timestep = (-t1 + t0) t1 = t0 # Show image # cv_image = cv2.rectangle(cv_image,(int(10*tss),int(5*tss)),(int(290*tss),int(100*tss)),(0,250, 0),2*int(tss)) if tag_detected: tlx_point = int(corner_points[0, 0]) tly_point = int(corner_points[0, 1]) brx_point = int(corner_points[2, 0]) bry_point = int(corner_points[2, 1]) cv_image = cv2.rectangle(cv_image, (tlx_point, tly_point), (brx_point, bry_point), (0, 165, 255), 1 * int(1000)) side_length = np.sqrt( ((tlx_point - brx_point) * (tlx_point - brx_point) + (tly_point - bry_point) * (tly_point - bry_point)) / 2) z_distance = (75 * 160 / side_length) * 75 / 44 #(160/270) msg5 = str("Z distance is " + str(int(z_distance)) + "cm") # cv_image = cv2.putText(cv_image,msg5,(int(tlx_point-side_length),int(tly_point-20)),cv2.FONT_HERSHEY_SIMPLEX,0.4*tss,(0,0,255),1*itss, cv2.LINE_AA) if z_distance < 15: twist_object.linear.x = np.clip((float(z_distance - 38) / 100), -0.1, 0.1) if z_distance < 5: twist_object.linear.x = 0 twist_object.angular.z = 0 print(" Too close to tag, stopped turtlebot") # cv_image = cv2.putText(cv_image,'Too close to tag',(int(20*tss),int(120*tss)),cv2.FONT_HERSHEY_SIMPLEX,0.5*tss,(0,0,255),1*itss, cv2.LINE_AA) # msg1 = str("Lane error " + str(int(err*100/width))+" %") # cv_image = cv2.putText(cv_image,msg1,(int(20*tss),int(45*tss)),cv2.FONT_HERSHEY_SIMPLEX,0.4*tss,(0,255,0),1*itss, cv2.LINE_AA) # msg2 = str("Linear velocity " + str(float(int(1000*twist_object.linear.x))/1000)) # cv_image = cv2.putText(cv_image,msg2,(int(20*tss),int(60*tss)),cv2.FONT_HERSHEY_SIMPLEX,0.4*tss,(0,255,0),1*itss, cv2.LINE_AA) # msg3 = str("Angular velocity " + str(float(int(1000*twist_object.angular.z))/1000)) # cv_image = cv2.putText(cv_image,msg3,(int(20*tss),int(75*tss)),cv2.FONT_HERSHEY_SIMPLEX,0.4*tss,(0,255,0),1*itss, cv2.LINE_AA) # msg4 = str("Update time (ms) " + str((int(1000*timestep)))) # cv_image = cv2.putText(cv_image,msg4,(int(20*tss),int(90*tss)),cv2.FONT_HERSHEY_SIMPLEX,0.4*tss,(0,255,0),1*itss, cv2.LINE_AA) # Print # print("\n Angular value sent=> "+str(float(int(1000*twist_object.angular.z))/1000)) # print(" Linear value sent=> "+str(float(int(1000*twist_object.linear.x))/1000)) # print(" Lane error => "+str(int(err*100/width))+" %") # print(" cx => "+str(float(int(100*cx))/100)+"\n cy => "+str(float(int(100*cy))/100)) # print(" Timestep (sec) => "+str(float(int(1000*timestep))/1000)) # print(" Update rate => "+str(int(1/timestep))) if tag_detected: print(corner_points) print(" Tag side length = ", side_length) print(" Tag z disatnce = ", z_distance) print(" \n") cv2.namedWindow("Apriltag follower", cv2.WINDOW_NORMAL) cv2.imshow("Apriltag follower", cv_image) cv2.resizeWindow("Apriltag follower", (500, 500)) cv2.waitKey(1) # Debug #twist_object.linear.x = 0 #twist_object.angular.z = 0 # Make it start turning self.moveTurtlebot3_object.move_robot(twist_object) print("\n")
class EyeTracker(): ## # # @param self Le pointeur vers l'objet EyeTracker # @param tracker_id int : identifiant du tracker au sein du Human # @param human_id int : identifiant du Human porteur du tracker # @param port int : numéro du port de communication du tracker # # @brief Constructeur de classe def __init__(self, tracker_id, human_id, port): # Verification des types assert (type(tracker_id) == int) assert (type(human_id) == int) assert (type(port) == int) # Creation des attributs self.id = tracker_id self.human_id = human_id self.port = port self.detected_tags = {} self.gaze_in_cam = Vector(0.0, 0.0, 1.0) #Parametres de la caméra frontale du tracker # mtx : matrice intrinsèque # h : hauteur de l'image en nombre de pixels # w : largeur de l'image en nombre de pixels # dist : paramètres de distorsion de l'image self.__mtx = Matrix([[1.600e+03, 0.00000000e+00, 6.40e+02], [0.00000000e+00, 1.600e+03, 3.60e+02], [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]]) #self.__mtx = Matrix([[829.3510515270362, 0.0, 659.9293047259697],[0.0, 799.5709408845464, 373.0776462356668],[0.0, 0.0, 1.0]]) self.__h = 720 self.__w = 1280 self.__dist = np.array([[ -0.43738542863224966, 0.190570781428104, -0.00125233833830639, 0.0018723428760170056, -0.039219091259637684 ]]) # Communication avec le tracker self.__context = zmq.Context() self.__addr = '127.0.0.1' # remote ip ou localhost self.__req = self.__context.socket(zmq.REQ) self.__req.connect("tcp://{}:{}".format(self.__addr, str(self.port))) self.__req.send_string('SUB_PORT') # Demande du port de requete self.__sub_port = self.__req.recv_string() # Stockage port de requete # Démarrer le frame publisher au format Noir et Blanc self.notify({ 'subject': 'start_plugin', 'name': 'Frame_Publisher', 'args': { 'format': 'gray' } }) # Ouvert d'un port de souscription(sub) pour ecouter le tracker (frames) self.__sub_frame = self.__context.socket(zmq.SUB) self.__sub_frame.connect("tcp://{}:{}".format(self.__addr, self.__sub_port)) # Recevoir uniquement les notifications concernant les frames self.__sub_frame.setsockopt_string(zmq.SUBSCRIBE, 'frame.world') # Ouvert d'un port de souscription(sub) pour ecouter le tracker (gaze) self.__sub_gaze = self.__context.socket(zmq.SUB) self.__sub_gaze.connect("tcp://{}:{}".format(self.__addr, self.__sub_port)) # Recevoir uniquement les notifications concernant les frames self.__sub_gaze.setsockopt_string(zmq.SUBSCRIBE, 'gaze.2d.0') # Thread lecteur de frames en continu self.__frames_reader = threading.Thread(target=self.always_recv_frame, args=(), daemon=True) self.__frames_lock = threading.Lock() self.__available_frame = False self.__frames_reader.start() # Thread lecteur de gaze en continu self.__gaze_reader = threading.Thread(target=self.always_recv_gaze, args=(), daemon=True) self.__gaze_lock = threading.Lock() self.__available_gaze = False self.__gaze_reader.start() # Détecteur de tags self.__at_detector = Detector(families='tag36h11', nthreads=1, quad_decimate=1.0, quad_sigma=0.0, refine_edges=1, decode_sharpening=0.25, debug=0) ## # # @param self Le pointeur vers l'objet EyeTracker # # @brief Lit la dernièere frame reçue par le 'sub' et la stocke def always_recv_frame(self): # Vide le cache : on lit 10 frames sans les traiter # Nombre a adapter aux performances de l'ordinateur print("start reader") while (True): try: topic = self.__sub_frame.recv_string() payload = unpackb(self.__sub_frame.recv(), raw=False) extra_frames = [] while self.__sub_frame.get(zmq.RCVMORE): extra_frames.append(self.__sub_frame.recv()) if extra_frames: payload['_raw_data_'] = extra_frames with self.__frames_lock: self.__payload = payload self.__available_frame = True except: pass ## # # @param self Le pointeur vers l'objet EyeTracker # # @brief Lit le dernier message regard reçu par le 'sub' et le stocke def always_recv_gaze(self): # Vide le cache : on lit 10 frames sans les traiter # Nombre a adapter aux performances de l'ordinateur print("start reader") while (True): try: topic, payload = self.__sub_gaze.recv_multipart() message = msgpack.loads(payload) with self.__gaze_lock: self.__normalized_pos = [ message[b'norm_pos'][0], message[b'norm_pos'][1] ] self.__available_gaze = True except: pass ## # # @param self Le pointeur vers l'objet EyeTracker # @param notification dict : Le message à envoyer # # @brief Envoie un message à l'API de l'Eye Tracker def notify(self, notification): topic = 'notify.' + notification['subject'] payload = packb(notification, use_bin_type=True) self.__req.send_string(topic, flags=zmq.SNDMORE) self.__req.send(payload) return self.__req.recv_string() ## # # @param self Le pointeur vers l'objet EyeTracker # # @brief Verifie la connection avec l'eye-tracker def checkConnection(self): pass ## # # @param self Le pointeur vers l'objet EyeTracker # @param size_tag float : taille réelle d'un tag (en mètre) # # @brief Analyse l'image actuelle # # Capture l'image frontale récente, détecte les tags présents et met # à jour le dict "detected_tags" def updateFrame(self, size_tag): # Récupération de la frame with self.__frames_lock: if (self.__available_frame == True): msg = self.__payload else: return with self.__gaze_lock: if (self.__available_gaze == True): norm_pos = self.__normalized_pos else: return # Position regard dans caméra fx = self.__mtx[0, 0] fy = self.__mtx[1, 1] cx = self.__mtx[0, 2] cy = self.__mtx[1, 2] self.gaze_in_cam = Vector(norm_pos[0] * self.__w / fx - cx / fx, (1 - norm_pos[1]) * self.__h / fy - cy / fy, 1.0) #Image caméra frontale en noir et blanc recent_world = np.frombuffer(msg['_raw_data_'][0], dtype=np.uint8, count=msg['height'] * msg['width']).reshape( msg['height'], msg['width']) tags = self.__at_detector.detect(recent_world, estimate_tag_pose=True, camera_params=[ self.__mtx[0, 0], self.__mtx[1, 1], self.__mtx[0, 2], self.__mtx[1, 2] ], tag_size=size_tag) # Remplissage du dictionnaire de tags self.detected_tags = {} id_in_dict = 0 for tag in tags: # Caractéristiques du tag détecté tag_family = str(tag.tag_family) tag_family_id = tag.tag_id translation = tag.pose_t rotation = tag.pose_R # Ajout du tag dans le dictionnaire self.detected_tags[id_in_dict] = DetectedTag( tag_family, tag_family_id, id_in_dict, Vector(translation[0, 0], translation[1, 0], translation[2, 0]), Matrix(rotation.tolist())) # Incrémentation de l'indice id_in_dict += 1 ## # # @param self Le pointeur vers l'objet EyeTracker # # @brief Affichage de la configuration actuelle de l'EyeTracker # # Affichage du port et de la liste des AprilTags détectés def showEyeTracker(self): print("") print("Eye Tracker - ", self.id) print("Human id - ", self.human_id) print("Port - ", self.port) print("Detected Tags - ", self.detected_tags)
def test(): at_detector = Detector( families="tag36h11", nthreads=1, quad_decimate=1.0, quad_sigma=0.0, refine_edges=1, decode_sharpening=0.5, debug=0, ) camera_params = [ 425.40704345703125, 425.40704345703125, 421.0938720703125, 246.84486389160156 ] tag_size = 0.030 centers = [] angles = [] for i in range(200): image = cv2.imread( f"/scratch/azimov/igor/data/physics/tmp/{i:04d}.png", 0) tags = at_detector.detect( image, estimate_tag_pose=True, camera_params=camera_params, tag_size=tag_size, ) if len(tags) == 1: centers.append(tags[0].pose_t) euler = mat2euler(tags[0].pose_R) print(euler) angles.append(euler[2]) # print(tags) print(f"{i:04d} / {200:04d}: {len(tags)}") for i in range(1, len(angles)): a0 = angles[i - 1] a1 = angles[i] if a0 - a1 > 270 - 30: angles[i] += 270 elif a0 - a1 > 180 - 30: angles[i] += 180 elif a0 - a1 > 90 - 30: angles[i] += 90 elif a1 - a0 > 270 - 30: angles[i] -= 270 elif a1 - a0 > 180 - 30: angles[i] -= 180 elif a1 - a0 > 90 - 30: angles[i] -= 90 times = [i / 90.0 for i in rage(200)] plt.figure() plt.subplot(2, 1, 1) plt.plot(times, [c[0] for c in centers], '-o', label="x") plt.plot(times, [c[1] for c in centers], '-o', label="y") # plt.plot([c[2] for c in centers], '-*', label="z") plt.legend() plt.subplot(2, 1, 2) plt.plot(times, angles, '-o', label="angle") plt.legend() plt.show() data = dict(time=times, center=centers, angle=np.rad2deg(angles).tolist()) json_name = file_name.replace('.bag', '.json') with open(json_name, 'w') as f: json.dump(data, f)
try: while True: topic, msg = recv_from_sub() if topic == 'frame.world': recent_world = np.frombuffer(msg['__raw_data__'][0], dtype=np.uint8,count=msg['height']*msg['width']).reshape(msg['height'], msg['width']) elif topic == 'frame.eye.0': pass #recent_eye0 = np.frombuffer(msg['__raw_data__'][0], dtype=np.uint8).reshape(msg['height'], msg['width']) elif topic == 'frame.eye.1': pass #recent_eye1 = np.frombuffer(msg['__raw_data__'][0], dtype=np.uint8).reshape(msg['height'], msg['width']) if recent_world is not None: if i%50==0 : tags = at_detector.detect(recent_world, estimate_tag_pose=True, camera_params=[542.098,542.128,311.448,217.969], tag_size=0.0305) print(len(tags)) i+=1 cv2.imshow("world", recent_world) if cv2.waitKey(1) & 0xFF == ord('q'): break pass # here you can do calculation on the 3 most recent world, eye0 and eye1 images if cv2.waitKey(1) & 0xFF == ord('p'): print("Photo prise") cv2.imwrite( "C:/Users/gobgr/Desktop/photos_calbration_4/calib_img_"+str(indice)+".jpg", recent_world) indice+=1
def detect_tags_and_surfaces(frames_path: str, createSurfaceFrame=False, tags=None, tags_corner_attribute=None): """Detect all tags (Apriltags3) & surfaces found in a folder of PNG files and return (1) a list of tag objects for preprocessing, (2) a dictionary containing the frequency that each tag ID appeared, (3) a dataframe consisting of all surface coordinates associated with each frame Args: frames_path (str): path to the directory containing PNG images createSurfaceFrame (bool): True to create surface_frames directory with annotated frames tags(List[int]): ids from bottom-left corner, counter-clockwise --> 1 surface tags_corner_attribute (List[bool]): order corresponds to tags, True if tag is corner Returns: frames (List[Dict[str, Any]]): list of objects containing id (int), centroid (np.array[int]) and corners (np.array[int]) tag_ids (Dict[int, int]): dictionary mapping tag IDs to frequency of tag across all images coordinates_df (DataFrame): dataframe that lists the coordinates of the corners & center """ if tags_corner_attribute is None: tags_corner_attribute = [ True, False, False, True, False, True, False, False, True, False ] if tags is None: tags = [2, 3, 5, 6, 7, 8, 9, 11, 0, 1] if len(tags) != len(tags_corner_attribute): logging.warning( 'tags_corner_attribute variable should match tags to describe if corner' ) # Initialize variables frames = [] tag_ids = defaultdict(int) at_detector = Detector() img_n_total = [] starting_frame = False img_n = [] norm_top_left_corner_x = [] norm_top_right_corner_x = [] norm_bottom_left_corner_x = [] norm_bottom_right_corner_x = [] norm_center_x = [] norm_top_left_corner_y = [] norm_top_right_corner_y = [] norm_bottom_left_corner_y = [] norm_bottom_right_corner_y = [] norm_center_y = [] # Sort by index in.../frame<index>.png all_images = sorted(glob(f'{frames_path}/*.png'), key=lambda f: int(os.path.basename(f)[5:-4])) # Deleted last image after out of range error popped up if len(all_images) > 1: all_images = all_images[:-1] # Iterate thru all PNG images in frames_path for i, img_path in enumerate(all_images): # Create a grayscale 2D NumPy array for Detector.detect() img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) if type(img) == np.ndarray: tags_in_framex = [] for tag in at_detector.detect(img): tags_in_framex = detect_tags_in_framex(tag_ids, tag, tags_in_framex) frames.append(tags_in_framex) # all frame numbers head, tail = os.path.split(img_path) img_n_total.append(int(''.join(list(filter(str.isdigit, tail))))) # detect surfaces once the first frame with all tags are identified if len(tags_in_framex) == len(tags): starting_frame = True if starting_frame: # frame number img_n.append(int(''.join(list(filter(str.isdigit, tail))))) # define surface norm_tl, norm_tr, norm_bl, norm_br, norm_c = norm_surface_coordinates( frames, img_path, tags, tags_corner_attribute, createSurfaceFrame) norm_top_left_corner_x.append(norm_tl[0]) norm_top_right_corner_x.append(norm_tr[0]) norm_bottom_left_corner_x.append(norm_bl[0]) norm_bottom_right_corner_x.append(norm_br[0]) norm_center_x.append(norm_c[0]) norm_top_left_corner_y.append(norm_tl[1]) norm_top_right_corner_y.append(norm_tr[1]) norm_bottom_left_corner_y.append(norm_bl[1]) norm_bottom_right_corner_y.append(norm_br[1]) norm_center_y.append(norm_c[1]) time.sleep(0.01) print_progress_bar(i + 1, len(all_images), prefix='Progress:', suffix='Complete', length=50) # coordinates dataframe output surface_coords = { 'image': img_n, 'timestamp': detect_timestamp_to_frame(frames_path, img_n_total, img_n), 'norm_top_left_x': norm_top_left_corner_x, 'norm_bottom_left_x': norm_bottom_left_corner_x, 'norm_bottom_right_x': norm_bottom_right_corner_x, 'norm_top_right_x': norm_top_right_corner_x, 'norm_center_x': norm_center_x, 'norm_top_left_y': norm_top_left_corner_y, 'norm_bottom_left_y': norm_bottom_left_corner_y, 'norm_bottom_right_y': norm_bottom_right_corner_y, 'norm_top_right_y': norm_top_right_corner_y, 'norm_center_y': norm_center_y } coordinates_df = pd.DataFrame(surface_coords) return frames, dict(tag_ids), coordinates_df
camera.start_preview() print("2 Seconds") sleep(2) camera.capture(stream, 'yuv') # Rewind the stream for reading stream.seek(0) # Calculate the actual image size in the stream (accounting for rounding # of the resolution) fwidth = (RES[0] + 31) // 32 * 32 fheight = (RES[1] + 15) // 16 * 16 # Load the Y (luminance) data from the stream I_distorted = np.fromfile(stream, dtype=np.uint8, count=fwidth * fheight).reshape((fheight, fwidth)) I = cv2.remap(I_distorted, camera_info["map_1"], camera_info["map_2"], interpolation=cv2.INTER_LINEAR) detected_tags = detector.detect(I, estimate_tag_pose=True, camera_params=camera_info["params"], tag_size=tags.size) # Estimate Camera Pose for t in detected_tags: print(f"{t.tag_id}") print(tags.estimate_pose(t.tag_id, t.pose_R, t.pose_t)) # Visualize The Frame cv2.imshow("Visualized Tags", visualize_frame(I, detected_tags)) cv2.waitKey(0) # waits until a key is pressed cv2.destroyAllWindows() # destroys the window showing image
context = zmq.Context() sockImage = context.socket(zmq.SUB) sockImage.connect('tcp://192.168.50.111:5560') sockImage.setsockopt(zmq.RCVHWM, 1) sockImage.subscribe('') sockPublish = context.socket(zmq.PUB) sockPublish.bind('ipc:///home/azazdeaz/mirrors') sockPublish.setsockopt(zmq.CONFLATE, 1) while True: print("Waiting for message") message = sockImage.recv() # time.sleep (1) nparr = np.frombuffer(message, dtype=np.uint8) img = cv2.imdecode(nparr, cv2.IMREAD_GRAYSCALE) tags = at_detector.detect(img, estimate_tag_pose=True, camera_params=camera_params, tag_size=0.08) tags = [conver_detection_to_dict(detection) for detection in tags] response = msgpack.packb({ 'tags': tags, 'image': message }, use_bin_type=True) sockPublish.send(response) print('message sent')
import time t1 = time.time() img = cv2.imread("DockTag2.PNG") print(img.shape) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) at_detector = Detector(families='tag36h11', nthreads=1, quad_decimate=2.0, quad_sigma=0.0, refine_edges=1, decode_sharpening=0.25, debug=0) results = at_detector.detect(gray, estimate_tag_pose=True, camera_params=[800.5335,801.2600,313.2403,231.1194], tag_size=0.20) for r in results: # extract the bounding box (x, y)-coordinates for the AprilTag # and convert each of the (x, y)-coordinate pairs to integers (ptA, ptB, ptC, ptD) = r.corners ptB = (int(ptB[0]), int(ptB[1])) ptC = (int(ptC[0]), int(ptC[1])) ptD = (int(ptD[0]), int(ptD[1])) ptA = (int(ptA[0]), int(ptA[1])) # draw the bounding box of the AprilTag detection cv2.line(img, ptA, ptB, (0, 255, 0), 2) cv2.line(img, ptB, ptC, (0, 255, 0), 2) cv2.line(img, ptC, ptD, (0, 255, 0), 2) cv2.line(img, ptD, ptA, (0, 255, 0), 2) # draw the center (x, y)-coordinates of the AprilTag
import cv2 import numpy as np from pupil_apriltags import Detector img = cv2.imread('images/track.jpeg') img = cv2.resize(img, (0, 0), fx=0.2, fy=0.2) at_detector = Detector(families='tagStandard41h12', nthreads=1, quad_decimate=1.0, quad_sigma=0.0, refine_edges=1, decode_sharpening=0.25, debug=0) results = at_detector.detect(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)) # img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) result = results[1] points = np.array(result.corners) pts1 = np.float32([points[3], points[2], points[1], points[0]]) # [top left, top right, bottom left, bottom right] pts2 = np.float32([[0, 50], [0, 0], [50, 0], [50, 50]]) # [top left, top right, bottom left, bottom right] # for i, pt in enumerate(pts1): # img = cv2.putText(img, str(i), tuple(pt.astype(np.int)), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 0, 0), 1) cv2.imshow('img1', img) h = cv2.getPerspectiveTransform(pts1, pts2) img3 = cv2.warpPerspective(img, np.dot(np.array([[1, 0, 25], [0, 1, 25], [0, 0, 1]]), h), (2000, 2000), cv2.INTER_CUBIC) cv2.imshow('img3', img3) """