def __init__(self, params): self.params = params self.threshold = self.params['reid_thr'] self.model = self.generate_model() self.model = self.compile_model(self.model) weight = 'reid_model/reid_weight.h5' with self.gg.as_default() as g: self.model.load_weights(weight) self.OBJ_TOPIC = self.params['obj_topic'] self.TARGET_TOPIC = self.params['reid_target_topic'] self.PERSON_ID_TOPIC = self.params['reid_topic'] self.cvbridge = CvBridge() self.sub_target = rospy.Subscriber(self.TARGET_TOPIC, objs_array, self.callback_targets, queue_size=1) self.sub_obj = rospy.Subscriber(self.OBJ_TOPIC, objs_array, self.callback_objs, queue_size=1) self.reid_pub = rospy.Publisher(self.PERSON_ID_TOPIC, objs_array, queue_size=1) self.targets = objs_array() self.last_id = 0 print('[DIP] Re-identification module ready!')
def get_captions(self): msg = objs_array() msg.scene_rgb = self.numpyimg_to_rosimg(self.get_rgb()) self.captioning_req_time = time.time() result = ['no description'] self.captioning_flag = False self.pub_cap_req.publish(msg) while time.time() - self.captioning_req_time < 3: #print time.time()-self.captioning_req_time if self.captioning_flag: result = self.captioning_result break self.captioning_flag = False return result
def get_perception(self, fil=None, reid=True, pose=True, captioning=True): objs = self.match_perception(reid, pose, captioning) td = time.time() - objs.header.stamp.secs #print td if fil is None: return objs else: result = objs_array() result.header = objs.header result.msg_idx = objs.msg_idx result.scene_rgb = objs.scene_rgb result.scene_cloud = objs.scene_cloud for item in objs.objects: if item.class_string in fil: result.objects.append(item) return result
def __init__(self): #requires ip and port self.params = DIP_load_config.load_config() rospy.init_node("DIP") self.cvbridge = CvBridge() self.captioning_keywords = [] self.reid_targets = [] self.objs_history = [] self.objs_history_idx = [] self.objs_history_max = 10 self.reid_history = [] self.reid_history_idx = [] self.reid_history_max = 3 self.pose_history = [] self.pose_history_idx = [] self.pose_history_max = 3 self.cap_history = [] self.cap_history_idx = [] self.cap_history_max = 3 self.objects = objs_array() self.objects_w_pose = objs_array() self.people_identified = objs_array() self.reid_targets = objs_array() self.sub_obj = rospy.Subscriber(self.params['obj_topic'], objs_array, self.callback_objs, queue_size=1) self.sub_obj_w_pose = rospy.Subscriber(self.params['pose_topic'], objs_array, self.callback_pose, queue_size=1) self.sub_obj_w_identify = rospy.Subscriber(self.params['reid_topic'], objs_array, self.callback_identify, queue_size=1) self.sub_obj_w_cap = rospy.Subscriber(self.params['captioning_topic'], objs_array, self.callback_objs_w_caps, queue_size=1) self.reid_target_pub = rospy.Publisher( self.params['reid_target_topic'], objs_array, queue_size=1) self.captioning_keywords_pub = rospy.Publisher( self.params['captioning_keywords_topic'], string_array, queue_size=1) self.perception_pub = rospy.Publisher(self.params['perception_topic'], objs_array, queue_size=1) self.perception = objs_array() #for captioning self.pub_cap_req = rospy.Publisher( self.params['captioning_request_topic'], objs_array, queue_size=1) self.sub_cap_res = rospy.Subscriber( self.params['captioning_response_topic'], objs_array, self.callback_captioning_response) self.captioning_req_time = time.time() self.captioning_flag = False self.captioning_result = [] if self.params['show_integrated_perception']: cv2.startWindowThread() cv2.namedWindow('DIP_jychoi') while not rospy.is_shutdown(): self.process_perception()
def match_perception(self, reid=True, pose=True, captioning=True): tic = time.time() required = 0 required += int(reid) + int(pose) + int(captioning) ooo = self.objs_history[::-1] if len(ooo) == 0: return objs_array() rrr = self.reid_history[::-1] ppp = self.pose_history[::-1] ccc = self.cap_history[::-1] oooi = self.objs_history_idx[::-1] rrri = self.reid_history_idx[::-1] pppi = self.pose_history_idx[::-1] ccci = self.cap_history_idx[::-1] contingency = None for k in range(len(oooi)): got = 0 r, p, c = None, None, None if (oooi[k] in rrri and reid): r = rrr[rrri.index(oooi[k])] got += 1 if (oooi[k] in pppi and pose): p = ppp[pppi.index(oooi[k])] got += 1 if (oooi[k] in ccci and captioning): c = ccc[ccci.index(oooi[k])] got += 1 for o in ooo[k].objects: if o.class_string != 'person': continue if p is not None: if contingency is None: contingency = p for pp in p.objects: if pp.object_id == o.object_id: o.joints = pp.joints o.tags += pp.tags o.pose_wrt_robot = pp.pose_wrt_robot o.pose_wrt_odom = pp.pose_wrt_odom o.pose_wrt_map = pp.pose_wrt_map if r is not None: for rr in r.objects: if rr.object_id == o.object_id: o.person_id = rr.person_id o.person_name = rr.person_name o.reid_score = rr.reid_score o.tags += rr.tags if c is not None: for cc in c.objects: if cc.object_id == o.object_id: o.captions = cc.captions o.tags += cc.tags o.tags = list(set(o.tags)) if got == required: return ooo[k] if contingency is not None: return contingency return ooo[0]
def callback_image(self, msg): if self.point_cloud is None: print("No point cloud data") return None tic = time.time() img = self.cvbridge.imgmsg_to_cv2(msg, 'bgr8') img = cv2.resize(img, (320, 240)) detections = self.tfnet.return_predict(img) img_display = img.copy() objarray = objs_array() objarray.comm_delay = time.time() - self.tttt print '[DIP] Detection. time elapsed : ', time.time() - self.tttt self.tttt = time.time() objarray.header = msg.header objarray.header.stamp = rospy.Time.from_sec(time.time()) objarray.msg_idx = self.msg_idx self.msg_idx += 1 if self.msg_idx > self.msg_idx_max: self.msg_idx = 0 temp = [] objarray.header.frame_id = msg.header.frame_id temp_tt = 0 for i in range(len(detections)): obj = objs() obj.object_id = self.object_id self.object_id += 1 if self.object_id > self.object_id_max: self.object_id = 0 obj.person_id = -1 #unknown obj.person_name = '' obj.class_string = detections[i]['label'] obj.tags.append(detections[i]['label']) if obj.class_string == 'person': obj.tags.append('people') tlx = int(detections[i]['topleft']['y']) tly = int(detections[i]['topleft']['x']) brx = int(detections[i]['bottomright']['y']) bry = int(detections[i]['bottomright']['x']) x = (tlx + brx) / 2 y = (tly + bry) / 2 h = (brx - tlx) / 2 w = (bry - tly) / 2 obj.x = x obj.y = y obj.h = h obj.w = w obj.confidence = detections[i]['confidence'] crop = img[max(0, x - h):min(img.shape[0], x + h), max(0, y - w):min(img.shape[1], y + w)] ttiicc = time.time() max_score = -1 sub_class = None for mi in self.match_images.keys(): mi_spl = mi.split('_') mi_cls = mi_spl[0] mi_subcls = mi_spl[1] mi_idx = mi_spl[2] ob_cls = obj.class_string if mi_cls in self.class_reroute.keys(): mi_cls = self.class_reroute[mi_cls] if ob_cls in self.class_reroute.keys(): ob_cls = self.class_reroute[ob_cls] if ob_cls != mi_cls: continue scr = self.compare_hist(crop, self.match_images[mi]) #print mi, scr, if max_score < scr: max_score = scr sub_class = mi_subcls #print '' temp_tt += time.time() - ttiicc if sub_class is not None: obj.tags.append(sub_class) if self.params['show_od']: cv2.rectangle(img_display, (tly, tlx), (bry, brx), (0, 255, 0), 2) lbl = detections[i]['label'] if sub_class is None else sub_class cv2.putText(img_display, lbl, (tly, tlx - 8), cv2.FONT_HERSHEY_SIMPLEX, 0.3, color=(0, 0, 0), thickness=1) obj.cropped = self.cvbridge.cv2_to_imgmsg(crop, "bgr8") cropped_point = self.point_cloud[obj.x - obj.h:obj.x + obj.h, obj.y - obj.w:obj.y + obj.w] obj.cropped_cloud = self.cvbridge.cv2_to_imgmsg( cropped_point, encoding="passthrough") point_x = min(max(0, int(obj.x - 0.5 * obj.h)), 240) if self.params['use_loc']: pose_wrt_robot = self.get_pos_wrt_robot(point_x, obj.y, scan_len=obj.h, scan='point') if (pose_wrt_robot == 0).all(): continue if pose_wrt_robot[0] > 8.0: continue #max range = 10m?? obj.pose_wrt_robot.position.x = pose_wrt_robot[0] obj.pose_wrt_robot.position.y = pose_wrt_robot[1] obj.pose_wrt_robot.position.z = pose_wrt_robot[2] pose_wrt_map = self.get_loc(pose_wrt_robot)[0] obj.pose_wrt_map.position.x = pose_wrt_map[0] obj.pose_wrt_map.position.y = pose_wrt_map[1] obj.pose_wrt_map.position.z = pose_wrt_map[2] pose_wrt_odom = self.get_loc(pose_wrt_robot, target='odom')[0] obj.pose_wrt_odom.position.x = pose_wrt_odom[0] obj.pose_wrt_odom.position.y = pose_wrt_odom[1] obj.pose_wrt_odom.position.z = pose_wrt_odom[2] obj.valid_pose = 1 temp.append(obj) #print temp_tt objarray.objects = temp objarray.scene_rgb = msg objarray.scene_cloud = self.cvbridge.cv2_to_imgmsg( self.point_cloud, 'passthrough') if self.params['show_od']: cv2.imshow('Object_detector', cv2.resize(img_display, (640, 480))) self.obj_pub.publish(objarray)