def process_frame(self, changed_object_id=None): self.event = self.env.last_event self.pose = game_util.get_pose(self.event) self.s_t_orig = self.event.frame self.s_t = game_util.imresize(self.event.frame, (constants.SCREEN_HEIGHT, constants.SCREEN_WIDTH), rescale=False) self.s_t_depth = game_util.imresize(self.event.depth_frame, (constants.SCREEN_HEIGHT, constants.SCREEN_WIDTH), rescale=False)
def process_frame(self, run_object_detection=False): self.im_count += 1 #print ("pose b4 manipulation", self.event.pose) self.pose = game_util.get_pose(self.event) #self.pose,pose_2 = game_util.get_pose(self.event) #print ("pose after manipulation", self.pose) #print ("pose after own manipulation", pose_2) #print () #for key,items in self.event.metadata.items(): #print (len(self.event.events)) i = 0 #for key,value in self.event.__dict__.items(): # if key == "frame" : # print (key,type(value),len(value))#value) # break # i += 1 return #print ("++++++ B$ function call") self.s_t_orig = self.event.frame self.s_t = game_util.imresize( self.event.frame, (constants.SCREEN_HEIGHT, constants.SCREEN_WIDTH), rescale=False) #print ("========== after function call") #print ("size of s_t", len(self.s_t)) #print ("type of s_t", type(self.s_t)) #return# #print ("predict depth , drawing ", constants.PREDICT_DEPTH,constants.DRAWING) if constants.DRAWING: self.detection_image = self.s_t_orig.copy() if constants.PREDICT_DEPTH: print("in predict depth") t_start = time.time() self.s_t_depth = self.depth_estimator.get_depth(self.s_t) self.times[0, 0] += time.time() - t_start self.times[0, 1] += 1 if self.times[0, 1] % 100 == 0: print('depth time %.3f' % (self.times[0, 0] / self.times[0, 1])) elif constants.RENDER_DEPTH_IMAGE: self.s_t_depth = game_util.imresize( self.event.depth_frame, (constants.SCREEN_HEIGHT, constants.SCREEN_WIDTH), rescale=False) if (constants.GT_OBJECT_DETECTION or constants.OBJECT_DETECTION or (constants.END_TO_END_BASELINE and constants.USE_OBJECT_DETECTION_AS_INPUT) and not run_object_detection): if constants.OBJECT_DETECTION and not run_object_detection: # Get detections. t_start = time.time() boxes, scores, class_names = self.object_detector.detect( game_util.imresize(self.event.frame, (608, 608), rescale=False)) self.times[1, 0] += time.time() - t_start self.times[1, 1] += 1 if self.times[1, 1] % 100 == 0: print('detection time %.3f' % (self.times[1, 0] / self.times[1, 1])) mask_dict = {} used_inds = [] inds = list(range(len(boxes))) for (ii, box, score, class_name) in zip(inds, boxes, scores, class_names): if class_name in constants.OBJECT_CLASS_TO_ID: if class_name not in mask_dict: mask_dict[class_name] = np.zeros( (constants.SCREEN_HEIGHT, constants.SCREEN_WIDTH), dtype=np.float32) mask_dict[class_name][box[1]:box[3] + 1, box[0]:box[2] + 1] += score used_inds.append(ii) mask_dict = {k: np.minimum(v, 1) for k, v in mask_dict.items()} used_inds = np.array(used_inds) if len(used_inds) > 0: boxes = boxes[used_inds] scores = scores[used_inds] class_names = class_names[used_inds] else: boxes = np.zeros((0, 4)) scores = np.zeros(0) class_names = np.zeros(0) masks = [mask_dict[class_name] for class_name in class_names] if constants.END_TO_END_BASELINE: self.detection_mask_image = np.zeros( (constants.SCREEN_HEIGHT, constants.SCREEN_WIDTH, len(constants.OBJECTS)), dtype=np.float32) for cls in constants.OBJECTS: if cls not in mask_dict: continue self.detection_mask_image[:, :, constants.OBJECT_CLASS_TO_ID[ cls]] = mask_dict[cls] else: scores = [] class_names = [] masks = [] for (k, v) in self.event.class_masks.items(): if k in constants.OBJECT_CLASS_TO_ID and len(v) > 0: scores.append(1) class_names.append(k) masks.append(v) if constants.END_TO_END_BASELINE: self.detection_mask_image = np.zeros( (constants.SCREEN_HEIGHT, constants.SCREEN_WIDTH, constants.NUM_CLASSES), dtype=np.uint8) for cls in constants.OBJECTS: if cls not in self.event.class_detections2D: continue for box in self.event.class_detections2D[cls]: self.detection_mask_image[ box[1]:box[3] + 1, box[0]:box[2] + 1, constants.OBJECT_CLASS_TO_ID[cls]] = 1 if constants.RENDER_DEPTH_IMAGE or constants.PREDICT_DEPTH: xzy = game_util.depth_to_world_coordinates( self.s_t_depth, self.pose, self.camera_height / constants.AGENT_STEP_SIZE) max_depth_mask = self.s_t_depth >= constants.MAX_DEPTH for ii in range(len(masks)): mask = masks[ii] mask_locs = (mask > 0) locations = xzy[mask_locs, :2] max_depth_locs = max_depth_mask[mask_locs] depth_locs = np.logical_not(max_depth_locs) locations = locations[depth_locs] score = mask[mask_locs] score = score[depth_locs] # remove outliers: locations = locations.reshape(-1, 2) locations = np.round(locations).astype(np.int32) locations -= np.array(self.bounds)[[0, 1]] locations[:, 0] = np.clip(locations[:, 0], 0, self.bounds[2] - 1) locations[:, 1] = np.clip(locations[:, 1], 0, self.bounds[3] - 1) locations, unique_inds = game_util.unique_rows( locations, return_index=True) score = score[unique_inds] curr_score = self.graph.memory[ locations[:, 1], locations[:, 0], constants.OBJECT_CLASS_TO_ID[class_names[ii]] + 1] avg_locs = np.logical_and(curr_score > 0, curr_score < 1) curr_score[avg_locs] = curr_score[avg_locs] * .5 + score[ avg_locs] * .5 curr_score[curr_score == 0] = score[curr_score == 0] self.graph.memory[ locations[:, 1], locations[:, 0], constants.OBJECT_CLASS_TO_ID[class_names[ii]] + 1] = curr_score # inverse marked as empty locations = xzy[np.logical_not(mask_locs), :2] max_depth_locs = max_depth_mask[np.logical_not(mask_locs)] depth_locs = np.logical_not(max_depth_locs) locations = locations[depth_locs] locations = locations.reshape(-1, 2) locations = np.round(locations).astype(np.int32) locations[:, 0] = np.clip(locations[:, 0], self.bounds[0], self.bounds[0] + self.bounds[2] - 1) locations[:, 1] = np.clip(locations[:, 1], self.bounds[1], self.bounds[1] + self.bounds[3] - 1) locations = game_util.unique_rows(locations) locations -= np.array(self.bounds)[[0, 1]] curr_score = self.graph.memory[ locations[:, 1], locations[:, 0], constants.OBJECT_CLASS_TO_ID[class_names[ii]] + 1] replace_locs = np.logical_and(curr_score > 0, curr_score < 1) curr_score[replace_locs] = curr_score[replace_locs] * .8 self.graph.memory[ locations[:, 1], locations[:, 0], constants.OBJECT_CLASS_TO_ID[class_names[ii]] + 1] = curr_score if constants.DRAWING: if constants.GT_OBJECT_DETECTION: boxes = [] scores = [] class_names = [] for k, v in self.event.class_detections2D.items(): if k in constants.OBJECT_CLASS_TO_ID and len(v) > 0: boxes.extend(v) scores.extend([1] * len(v)) class_names.extend([k] * len(v)) boxes = np.array(boxes) scores = np.array(scores) self.detection_image = detector.visualize_detections( self.event.frame, boxes, class_names, scores)