def display_verbal_guidance(text): img_display = np.ones((200, 400, 3), dtype=np.uint8) * 100 lines = text.split('.') y_pos = 30 for line in lines: cv2.putText(img_display, line.strip(), (10, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.5, [0, 255, 0]) y_pos += 50 zc.check_and_display('text_guidance', img_display, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME)
def process(img, resize_ratio=1, display_list=[]): img_object, result = detect_object(img, resize_ratio) zc.check_and_display('object', img_object, display_list, wait_time=config.DISPLAY_WAIT_TIME, resize_max=config.DISPLAY_MAX_PIXEL) rtn_msg = {'status': 'success'} return (rtn_msg, json.dumps(result.tolist()))
def handle(self, header, data): # Receive data from control VM LOG.info("received new image") header['status'] = "nothing" result = {} # Preprocessing of input image img = zc.raw2cv_image(data, gray_scale=True) img_with_color = zc.raw2cv_image(data) img_with_color = cv2.resize(img_with_color, (config.IM_HEIGHT, config.IM_WIDTH)) b_channel, g_channel, r_channel = cv2.split(img_with_color) alpha_channel = np.ones(b_channel.shape, dtype=b_channel.dtype) * 50 img_RGBA = cv2.merge((b_channel, g_channel, r_channel, alpha_channel)) zc.check_and_display('input', img, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) # Get image match match = self.matcher.match(img) # Send annotation data to mobile client if match['status'] != 'success': return json.dumps(result) header['status'] = 'success' img_RGBA = cv2.resize(img_RGBA, (320, 240)) result['annotated_img'] = b64encode(zc.cv_image2raw(img_RGBA)) if match['key'] is not None: if match.get('annotated_text', None) is not None: result['annotated_text'] = match['annotated_text'] if match.get('annotation_img', None) is not None: annotation_img = match['annotation_img'] annotation_img = cv2.resize(annotation_img, (320, 240)) annotated_img = cv2.addWeighted(img_RGBA, 1, annotation_img, 1, 0) result['annotated_img'] = b64encode( zc.cv_image2raw(annotated_img)) else: result['annotated_text'] = "No match found" header[gabriel.Protocol_measurement. JSON_KEY_APP_SYMBOLIC_TIME] = time.time() return json.dumps(result)
def handle(self, header, data): #LOG.info("received new image") result = {'status': "nothing"} # default ## preprocessing of input image img = zc.raw2cv_image(data) if max(img.shape) > config.IMAGE_MAX_WH: resize_ratio = float(config.IMAGE_MAX_WH) / max( img.shape[0], img.shape[1]) img = cv2.resize(img, (0, 0), fx=resize_ratio, fy=resize_ratio, interpolation=cv2.INTER_AREA) zc.check_and_display('input', img, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) ## process the image rtn_msg, objects = pc.process(img, display_list) ## for measurement, when the sysmbolic representation has been got if gabriel.Debug.TIME_MEASUREMENT: result[gabriel.Protocol_measurement. JSON_KEY_APP_SYMBOLIC_TIME] = time.time() if rtn_msg['status'] == 'success': result['status'] = 'success' cue, CO_balls, pocket = objects result['speech'] = pc.get_guidance(img, cue, CO_balls, pocket, display_list) header['status'] = result.pop('status') header[gabriel.Protocol_measurement. JSON_KEY_APP_SYMBOLIC_TIME] = result.pop( gabriel.Protocol_measurement.JSON_KEY_APP_SYMBOLIC_TIME, -1) return json.dumps(result)
def _handle_input_data(self): if self.engine_id == "LEGO_SLOW": import lego_cv as lc else: import lego_cv_fast as lc # receive data from control VM header_size = struct.unpack("!I", self._recv_all(4))[0] data_size = struct.unpack("!I", self._recv_all(4))[0] header_str = self._recv_all(header_size) image_data = self._recv_all(data_size) #header = json.loads(header_str) ## symbolic representation extraction img = zc.raw2cv_image(image_data) stretch_ratio = float(16) / 9 * img.shape[0] / img.shape[1] if img.shape != (config.IMAGE_WIDTH, config.IMAGE_HEIGHT, 3): img = cv2.resize(img, (config.IMAGE_WIDTH, config.IMAGE_HEIGHT), interpolation=cv2.INTER_AREA) zc.check_and_display('input', img, display_list, wait_time=config.DISPLAY_WAIT_TIME, resize_max=config.DISPLAY_MAX_PIXEL) # get bitmap for current image rtn_msg, bitmap = lc.process(img, stretch_ratio, display_list) if rtn_msg['status'] != 'success': print rtn_msg['message'] result_str = "None" else: result_str = json.dumps(bitmap.tolist()) packet = struct.pack("!I%dsI%ds" % (len(header_str), len(result_str)), len(header_str), header_str, len(result_str), result_str) self.sock.sendall(packet)
def _handle_img(self, img): if self.is_first_frame and not config.RECOGNIZE_ONLY: # do something special when the task begins result, img_guidance = self.task.get_first_guidance() zc.check_and_display('guidance', img_guidance, display_list, wait_time = config.DISPLAY_WAIT_TIME, resize_max = config.DISPLAY_MAX_PIXEL) self.is_first_frame = False result['state_index'] = 0 # first step return json.dumps(result) result = {'status' : "nothing"} # default stretch_ratio = float(16) / 9 * img.shape[0] / img.shape[1] if img.shape != (config.IMAGE_WIDTH, config.IMAGE_HEIGHT, 3): img = cv2.resize(img, (config.IMAGE_WIDTH, config.IMAGE_HEIGHT), interpolation = cv2.INTER_AREA) ## get bitmap for current image zc.check_and_display('input', img, display_list, wait_time = config.DISPLAY_WAIT_TIME, resize_max = config.DISPLAY_MAX_PIXEL) rtn_msg, bitmap = lc.process(img, stretch_ratio, display_list) if rtn_msg['status'] != 'success': print rtn_msg['message'] if rtn_msg['message'] == "Not confident about reconstruction, maybe too much noise": self.counter['not_confident'] += 1 return json.dumps(result) self.counter['confident'] += 1 ## try to commit bitmap state_change = False if bm.bitmap_same(self.commited_bitmap, bitmap): pass else: current_time = time.time() if not bm.bitmap_same(self.temp_bitmap['bitmap'], bitmap): self.temp_bitmap['bitmap'] = bitmap self.temp_bitmap['first_time'] = current_time self.temp_bitmap['count'] = 0 self.counter['diff_from_prev'] += 1 else: self.counter['same_as_prev'] += 1 self.temp_bitmap['count'] += 1 if current_time - self.temp_bitmap['first_time'] > config.BM_WINDOW_MIN_TIME or self.temp_bitmap['count'] >= config.BM_WINDOW_MIN_COUNT: self.commited_bitmap = self.temp_bitmap['bitmap'] state_change = True #print "\n\n\n\n\n%s\n\n\n\n\n" % self.counter bitmap = self.commited_bitmap if 'lego_syn' in display_list and bitmap is not None: img_syn = bm.bitmap2syn_img(bitmap) zc.display_image('lego_syn', img_syn, wait_time = config.DISPLAY_WAIT_TIME, resize_scale = 50) if config.RECOGNIZE_ONLY: return json.dumps(result) ## now user has done something, provide some feedback img_guidance = None if state_change: self.task.update_state(bitmap) result, img_guidance = self.task.get_guidance() if self.task.is_final_state(): step_idx = len(self.task.states) else: # get current step step_idx = self.task.state2idx(self.task.current_state) # make sure step index is always -1 in case of error # also, differentiate from the default initial step (which we assign a step index 0) # from the internal step index obtained from the task (which also begins at 0) by # shifting the index by 1: step_idx = -1 if step_idx < 0 else step_idx + 1 result['state_index'] = step_idx if img_guidance is not None: zc.check_and_display('guidance', img_guidance, display_list, wait_time = config.DISPLAY_WAIT_TIME, resize_max = config.DISPLAY_MAX_PIXEL) return json.dumps(result)
def handle(self, header, data): LOG.info("received new image") header['status'] = "nothing" result = {} # default ## first image if self.is_first_image: self.is_first_image = False instruction = self.task.get_instruction(np.array([])) header['status'] = 'success' if instruction.get('speech', None) is not None: result['speech'] = instruction['speech'] display_verbal_guidance(result['speech']) if config.PLAY_SOUND: data = result['speech'] packet = struct.pack("!I%ds" % len(data), len(data), data) self.sound_sock.sendall(packet) if instruction.get('image', None) is not None: feedback_image = b64encode(zc.cv_image2raw(instruction['image'])) result['image'] = feedback_image zc.check_and_display('img_guidance', instruction['image'], display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) return json.dumps(result) ## preprocessing of input image img = zc.raw2cv_image(data) if header.get('holo_capture', None) is not None: zc.check_and_display('holo', img, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) return json.dumps(result) zc.check_and_display('input', img, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) ## preprocessing of input image resize_ratio = 1 if max(img.shape) > config.IMAGE_MAX_WH: resize_ratio = float(config.IMAGE_MAX_WH) / max(img.shape[0], img.shape[1]) img = cv2.resize(img, (0, 0), fx=resize_ratio, fy=resize_ratio, interpolation=cv2.INTER_AREA) zc.check_and_display('input', img, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) ## get current state t = time.time() rtn_msg, objects_data = cc.process(img, resize_ratio, display_list) print time.time() - t ## for measurement, when the sysmbolic representation has been got if gabriel.Debug.TIME_MEASUREMENT: result[gabriel.Protocol_measurement.JSON_KEY_APP_SYMBOLIC_TIME] = time.time() # the object detection result format is, for each line: [x1, y1, x2, y2, confidence, cls_idx] objects = np.array(json.loads(objects_data)) objects = reorder_objects(objects) if "object" in display_list: img_object = zc.vis_detections(img, objects, config.LABELS) zc.check_and_display("object", img_object, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) LOG.info("object detection result: %s" % objects) ## for measurement, when the sysmbolic representation has been got if gabriel.Debug.TIME_MEASUREMENT: header[gabriel.Protocol_measurement.JSON_KEY_APP_SYMBOLIC_TIME] = time.time() ## get instruction based on state instruction = self.task.get_instruction(objects) if instruction['status'] != 'success': return json.dumps(result) header['status'] = 'success' if instruction.get('speech', None) is not None: result['speech'] = instruction['speech'] display_verbal_guidance(result['speech']) if config.PLAY_SOUND: data = result['speech'] packet = struct.pack("!I%ds" % len(data), len(data), data) self.sound_sock.sendall(packet) if instruction.get('image', None) is not None: feedback_image = b64encode(zc.cv_image2raw(instruction['image'])) result['image'] = feedback_image zc.check_and_display('img_guidance', instruction['image'], display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) if instruction.get('holo_object', None) is not None: result['holo_object'] = instruction['holo_object'] if instruction.get('holo_location', None) is not None: result['holo_location'] = instruction['holo_location'] return json.dumps(result)
img_prev = cv2.imread(prev_file) ## preprocessing of input images if max(img.shape) != config.IMAGE_MAX_WH: resize_ratio = float(config.IMAGE_MAX_WH) / max(img.shape) img = cv2.resize(img, (0, 0), fx=resize_ratio, fy=resize_ratio, interpolation=cv2.INTER_AREA) img_prev = cv2.resize(img_prev, (0, 0), fx=resize_ratio, fy=resize_ratio, interpolation=cv2.INTER_AREA) zc.check_and_display('input', img, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) ####################### Start ############################### #pc.check_image(img, display_list) rtn_msg, objects = pc.find_table(img, display_list) if objects is not None: img_rotated, mask_table, M = objects print rtn_msg if rtn_msg['status'] == 'success': rtn_msg, objects = pc.find_table(img_prev, display_list) if objects is not None: img_prev_rotated, mask_table_prev, M = objects print rtn_msg if rtn_msg['status'] == 'success':
def find_table(img, display_list): ## find white border DoB = zc.get_DoB(img, 1, 31, method='Average') zc.check_and_display('DoB', DoB, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) mask_white = zc.color_inrange(DoB, 'HSV', V_L=10) zc.check_and_display_mask('mask_white_raw', img, mask_white, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) ## find purple table (roughly) #mask_table = zc.color_inrange(img, 'HSV', H_L = 130, H_U = 160, S_L = 50, V_L = 50, V_U = 220) mask_ground = zc.color_inrange(img, 'HSV', H_L=18, H_U=30, S_L=75, S_U=150, V_L=100, V_U=255) zc.check_and_display_mask('ground', img, mask_ground, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) # red car mask_red = zc.color_inrange(img, 'HSV', H_L=170, H_U=10, S_L=150) mask_ground = np.bitwise_or(mask_ground, mask_red) # ceiling mask_ceiling = np.zeros((360, 640), dtype=np.uint8) mask_ceiling[:40, :] = 255 mask_ground = np.bitwise_or(mask_ground, mask_ceiling) # find the screen mask_screen1 = zc.color_inrange(img, 'HSV', H_L=15, H_U=45, S_L=30, S_U=150, V_L=40, V_U=150) mask_screen2 = ((img[:, :, 2] - 5) > img[:, :, 0]).astype(np.uint8) * 255 mask_screen = np.bitwise_or(mask_screen1, mask_screen2) mask_screen = np.bitwise_and(np.bitwise_not(mask_ground), mask_screen) mask_screen = zc.shrink(mask_screen, 5, iterations=3) zc.check_and_display_mask('screen_raw', img, mask_screen, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) bool_screen = zc.mask2bool([mask_screen])[0] c_pixels = img[bool_screen].astype(np.int8) d_pixels = c_pixels[:, 2] - c_pixels[:, 0] rb_diff = np.median(d_pixels) print rb_diff if rb_diff > 20: print "Case 1" mask_table1 = zc.color_inrange(img, 'HSV', H_L=0, H_U=115, S_U=45, V_L=35, V_U=120) mask_table2 = zc.color_inrange(img, 'HSV', H_L=72, H_U=120, S_L=20, S_U=60, V_L=35, V_U=150) mask_table = np.bitwise_or(mask_table1, mask_table2) mask_screen = zc.color_inrange(img, 'HSV', H_L=15, H_U=45, S_L=60, S_U=150, V_L=40, V_U=150) elif rb_diff > 15: print "Case 2" mask_table1 = zc.color_inrange(img, 'HSV', H_L=0, H_U=115, S_U=45, V_L=35, V_U=120) mask_table2 = zc.color_inrange(img, 'HSV', H_L=72, H_U=120, S_L=20, S_U=60, V_L=35, V_U=150) mask_table = np.bitwise_or(mask_table1, mask_table2) mask_screen = zc.color_inrange(img, 'HSV', H_L=15, H_U=45, S_L=35, S_U=150, V_L=40, V_U=150) else: print "Case 3" mask_table1 = zc.color_inrange(img, 'HSV', H_L=0, H_U=115, S_U=20, V_L=35, V_U=115) mask_table2 = zc.color_inrange(img, 'HSV', H_L=72, H_U=120, S_L=20, S_U=60, V_L=35, V_U=150) mask_table = np.bitwise_or(mask_table1, mask_table2) mask_screen1 = zc.color_inrange(img, 'HSV', H_L=15, H_U=45, S_L=30, S_U=150, V_L=40, V_U=150) mask_screen2 = ( (img[:, :, 2] - 1) > img[:, :, 0]).astype(np.uint8) * 255 mask_screen = np.bitwise_or(mask_screen1, mask_screen2) mask_screen = np.bitwise_and(np.bitwise_not(mask_ground), mask_screen) zc.check_and_display_mask('screen', img, mask_screen, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) mask_table = np.bitwise_and(np.bitwise_not(mask_screen), mask_table) mask_table = np.bitwise_and(np.bitwise_not(zc.shrink(mask_ground, 3)), mask_table) mask_table, _ = zc.get_big_blobs(mask_table, min_area=50) mask_table = cv2.morphologyEx(mask_table, cv2.MORPH_CLOSE, zc.generate_kernel(7, 'circular'), iterations=1) #mask_table, _ = zc.find_largest_CC(mask_table) zc.check_and_display_mask('table_purple_raw', img, mask_table, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) if mask_table is None: rtn_msg = {'status': 'fail', 'message': 'Cannot find table'} return (rtn_msg, None) #mask_table_convex, _ = zc.make_convex(mask_table.copy(), app_ratio = 0.005) #mask_table = np.bitwise_or(mask_table, mask_table_convex) mask_table_raw = mask_table.copy() zc.check_and_display_mask('table_purple', img, mask_table, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) mask_table_convex, _ = zc.make_convex(zc.shrink(mask_table, 5, iterations=5), app_ratio=0.01) mask_table_shrunk = zc.shrink(mask_table_convex, 5, iterations=3) zc.check_and_display_mask('table_shrunk', img, mask_table_shrunk, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) ## fine tune the purple table based on white border mask_white = np.bitwise_and(np.bitwise_not(mask_table_shrunk), mask_white) if 'mask_white' in display_list: gray = np.float32(mask_white) dst = cv2.cornerHarris(gray, 10, 3, 0.04) dst = cv2.dilate(dst, None) img_white = img.copy() img_white[mask_white > 0, :] = [0, 255, 0] img_white[dst > 2.4e7] = [0, 0, 255] zc.check_and_display('mask_white', img_white, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) #mask_table, _ = zc.make_convex(mask_table, app_ratio = 0.005) for i in xrange(15): mask_table = zc.expand(mask_table, 3) mask_table = np.bitwise_and(np.bitwise_not(mask_white), mask_table) mask_table, _ = zc.find_largest_CC(mask_table) if mask_table is None: rtn_msg = { 'status': 'fail', 'message': 'Cannot find table, case 2' } return (rtn_msg, None) if i % 4 == 3: mask_table, _ = zc.make_convex(mask_table, app_ratio=0.01) #img_display = img.copy() #img_display[mask_table > 0, :] = [0, 0, 255] #zc.display_image('table%d-b' % i, img_display, resize_max = config.DISPLAY_MAX_PIXEL, wait_time = config.DISPLAY_WAIT_TIME) #mask_white = np.bitwise_and(np.bitwise_not(mask_table), mask_white) mask_table = np.bitwise_and(np.bitwise_not(mask_white), mask_table) mask_table, _ = zc.find_largest_CC(mask_table) mask_table, hull_table = zc.make_convex(mask_table, app_ratio=0.01) zc.check_and_display_mask('table_purple_fixed', img, mask_table, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) ## check if table is big enough table_area = cv2.contourArea(hull_table) table_area_percentage = float(table_area) / img.shape[0] / img.shape[1] if table_area_percentage < 0.06: rtn_msg = { 'status': 'fail', 'message': "Detected table too small: %f" % table_area_percentage } return (rtn_msg, None) ## find top line of table hull_table = np.array(zc.sort_pts(hull_table[:, 0, :], order_first='y')) ul = hull_table[0] ur = hull_table[1] if ul[0] > ur[0]: t = ul ul = ur ur = t i = 2 # the top two points in the hull are probably on the top line, but may not be the corners while i < hull_table.shape[0] and hull_table[i, 1] - hull_table[0, 1] < 80: pt_tmp = hull_table[i] if pt_tmp[0] < ul[0] or pt_tmp[0] > ur[0]: # computing the area of the part of triangle that lies inside the table triangle = np.vstack([pt_tmp, ul, ur]).astype(np.int32) mask_triangle = np.zeros_like(mask_table) cv2.drawContours(mask_triangle, [triangle], 0, 255, -1) pts = mask_table_raw[mask_triangle.astype(bool)] if np.sum(pts == 255) > 10: break if pt_tmp[0] < ul[0]: ul = pt_tmp else: ur = pt_tmp i += 1 else: break ul = [int(x) for x in ul] ur = [int(x) for x in ur] if 'table' in display_list: img_table = img.copy() img_table[mask_table.astype(bool), :] = [255, 0, 255] #cv2.line(img_table, tuple(ul), tuple(ur), [0, 255, 0], 3) zc.check_and_display('table', img_table, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) ## sanity checks about table top line detection if zc.euc_dist(ul, ur)**2 * 3.1 < table_area: rtn_msg = { 'status': 'fail', 'message': "Table top line too short: %f, %f" % (zc.euc_dist(ul, ur)**2 * 3.1, table_area) } return (rtn_msg, None) if abs(zc.line_angle(ul, ur)) > 0.4: rtn_msg = { 'status': 'fail', 'message': "Table top line tilted too much" } return (rtn_msg, None) # check if two table sides form a reasonable angle mask_table_bottom = mask_table.copy() mask_table_bottom[:-30] = 0 p_left_most = zc.get_edge_point(mask_table_bottom, (-1, 0)) p_right_most = zc.get_edge_point(mask_table_bottom, (1, 0)) if p_left_most is None or p_right_most is None: rtn_msg = { 'status': 'fail', 'message': "Table doesn't occupy bottom part of image" } return (rtn_msg, None) left_side_angle = zc.line_angle(ul, p_left_most) right_side_angle = zc.line_angle(ur, p_right_most) angle_diff = zc.angle_dist(left_side_angle, right_side_angle, angle_range=math.pi * 2) if abs(angle_diff) > 2.0: rtn_msg = { 'status': 'fail', 'message': "Angle between two side edge not right: %f" % angle_diff } return (rtn_msg, None) if 'table' in display_list: img_table = img.copy() img_table[mask_table.astype(bool), :] = [255, 0, 255] cv2.line(img_table, tuple(ul), tuple(ur), [0, 255, 0], 3) zc.check_and_display('table', img_table, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) ## rotate to make opponent upright, use table edge as reference pts1 = np.float32( [ul, ur, [ul[0] + (ur[1] - ul[1]), ul[1] - (ur[0] - ul[0])]]) pts2 = np.float32([[0, config.O_IMG_HEIGHT], [config.O_IMG_WIDTH, config.O_IMG_HEIGHT], [0, 0]]) M = cv2.getAffineTransform(pts1, pts2) img[np.bitwise_not(zc.get_mask(img, rtn_type="bool", th=3)), :] = [3, 3, 3] img_rotated = cv2.warpAffine(img, M, (config.O_IMG_WIDTH, config.O_IMG_HEIGHT)) ## sanity checks about rotated opponent image bool_img_rotated_valid = zc.get_mask(img_rotated, rtn_type="bool") if float(bool_img_rotated_valid.sum() ) / config.O_IMG_WIDTH / config.O_IMG_HEIGHT < 0.6: rtn_msg = { 'status': 'fail', 'message': "Valid area too small after rotation: %f" % (float(bool_img_rotated_valid.sum()) / config.O_IMG_WIDTH / config.O_IMG_HEIGHT) } return (rtn_msg, None) rtn_msg = {'status': 'success'} return (rtn_msg, (img_rotated, mask_table, M))
def find_opponent(img, img_prev, display_list): def draw_flow(img, flow, step=16): h, w = img.shape[:2] y, x = np.mgrid[step / 2:h:step, step / 2:w:step].reshape(2, -1) fx, fy = flow[y, x].T lines = np.vstack([x, y, x + fx, y + fy]).T.reshape(-1, 2, 2) lines = np.int32(lines + 0.5) vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) cv2.polylines(vis, lines, 0, (0, 255, 0)) for (x1, y1), (x2, y2) in lines: cv2.circle(vis, (x1, y1), 1, (0, 255, 0), -1) return vis def draw_rects(img, rects, color): for x1, y1, x2, y2 in rects: cv2.rectangle(img, (x1, y1), (x2, y2), color, 2) #start_time = current_milli_time() ## General preparations if 'opponent' in display_list: img_opponent = img_prev.copy() zc.check_and_display('rotated', img, display_list, is_resize=False, wait_time=config.DISPLAY_WAIT_TIME) zc.check_and_display('rotated_prev', img_prev, display_list, is_resize=False, wait_time=config.DISPLAY_WAIT_TIME) bw = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) bw_prev = cv2.cvtColor(img_prev, cv2.COLOR_BGR2GRAY) # valid part of img_prev mask_img_prev_valid = zc.get_mask(img_prev, rtn_type="mask") bool_img_prev_valid = zc.shrink(mask_img_prev_valid, 15, iterations=3).astype(bool) bool_img_prev_invalid = np.bitwise_not(bool_img_prev_valid) mask_white_prev = zc.color_inrange(img_prev, 'HSV', S_U=50, V_L=130) bool_white_prev = zc.shrink(mask_white_prev, 13, iterations=3, method='circular').astype(bool) # valid part of img mask_img_valid = zc.get_mask(img, rtn_type="mask") bool_img_valid = zc.shrink(mask_img_valid, 15, iterations=3).astype(bool) bool_img_invalid = np.bitwise_not(bool_img_valid) mask_white = zc.color_inrange(img, 'HSV', S_U=50, V_L=130) bool_white = zc.shrink(mask_white, 13, iterations=3, method='circular').astype(bool) # prior score according to height row_score, col_score = np.mgrid[0:img.shape[0], 0:img.shape[1]] row_score = img.shape[0] * 1.2 - row_score.astype(np.float32) #print "time0: %f" % (current_milli_time() - start_time) ## method 1: optical flow - dense opt_flow = np.zeros((bw.shape[0], bw.shape[1], 2), dtype=np.float32) opt_flow[::2, ::2, :] = cv2.calcOpticalFlowFarneback(bw_prev[::2, ::2], bw[::2, ::2], pyr_scale=0.5, levels=1, winsize=15, iterations=3, poly_n=7, poly_sigma=1.5, flags=0) if 'denseflow' in display_list: zc.display_image('denseflow', draw_flow(bw, opt_flow, step=16), is_resize=False, wait_time=config.DISPLAY_WAIT_TIME) # clean optical flow mag_flow = np.sqrt(np.sum(np.square(opt_flow), axis=2)) bool_flow_valid = mag_flow > 2 bool_flow_valid = np.bitwise_and(bool_flow_valid, bool_img_prev_valid) bool_flow_valid = np.bitwise_and(bool_flow_valid, np.bitwise_not(bool_white_prev)) bool_flow_invalid = np.bitwise_not(bool_flow_valid) # substract all the flow by flow average x_ave = np.mean(opt_flow[bool_flow_valid, 0]) y_ave = np.mean(opt_flow[bool_flow_valid, 1]) opt_flow[:, :, 0] -= x_ave opt_flow[:, :, 1] -= y_ave opt_flow[bool_flow_invalid, :] = 0 if 'denseflow_cleaned' in display_list: zc.display_image('denseflow_cleaned', draw_flow(bw, opt_flow, step=16), is_resize=False, wait_time=config.DISPLAY_WAIT_TIME) # give the flow a "score" score_flow = np.sqrt(np.sum(np.square(opt_flow), axis=2)) score_flow = score_flow * row_score score_horizonal = np.sum(score_flow, axis=0) low_pass_h = np.ones(120) low_pass_h /= low_pass_h.sum() score_horizonal_filtered_dense = np.convolve(score_horizonal, low_pass_h, mode='same') if 'dense_hist' in display_list: plot_bar(score_horizonal_filtered_dense, name='dense_hist') print np.argmax(score_horizonal_filtered_dense) if 'opponent' in display_list: cv2.circle(img_opponent, (np.argmax(score_horizonal_filtered_dense), 220), 20, (0, 255, 0), -1) #print "time1: %f" % (current_milli_time() - start_time) ## method 2: optical flow - LK feature_params = dict(maxCorners=100, qualityLevel=0.03, minDistance=5, blockSize=3) lk_params = dict(winSize=(15, 15), maxLevel=2, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) p0 = cv2.goodFeaturesToTrack(bw_prev, mask=mask_img_prev_valid, useHarrisDetector=False, **feature_params) if p0 is None: # TODO: this is also a possible indication that the rally is not on rtn_msg = { 'status': 'fail', 'message': 'No good featuresToTrack at all, probably no one in the scene' } return (rtn_msg, None) p1, st, err = cv2.calcOpticalFlowPyrLK(bw_prev, bw, p0, None, **lk_params) # Select good points good_new = p1[st == 1] good_old = p0[st == 1] # draw the tracks if 'LKflow' in display_list: img_LK = img_prev.copy() for i, (new, old) in enumerate(zip(good_new, good_old)): a, b = new.ravel() c, d = old.ravel() cv2.line(img_LK, (a, b), (c, d), (0, 255, 0), 2) cv2.circle(img_LK, (c, d), 5, (0, 255, 0), -1) zc.display_image('LKflow', img_LK, is_resize=False, wait_time=config.DISPLAY_WAIT_TIME) bool_flow_valid = np.bitwise_and(bool_img_valid, np.bitwise_not(bool_white)) bool_flow_invalid = np.bitwise_not(bool_flow_valid) bool_flow_valid_prev = np.bitwise_and(bool_img_prev_valid, np.bitwise_not(bool_white_prev)) bool_flow_invalid_prev = np.bitwise_not(bool_flow_valid_prev) is_reallygood = np.zeros((good_new.shape[0]), dtype=bool) for i, (new, old) in enumerate(zip(good_new, good_old)): a, b = new.ravel() c, d = old.ravel() if bool_flow_invalid_prev[d, c] or max(a, b) > config.O_IMG_HEIGHT or min( a, b) < 0 or bool_flow_invalid[b, a]: continue is_reallygood[i] = True reallygood_new = good_new[is_reallygood] reallygood_old = good_old[is_reallygood] motion = reallygood_new - reallygood_old motion_real = motion - np.mean(motion, axis=0) if 'LKflow_cleaned' in display_list: img_LK_cleaned = img_prev.copy() img_LK_cleaned[bool_flow_invalid_prev, :] = [0, 0, 255] for i, (new, old) in enumerate(zip(reallygood_new, reallygood_old)): c, d = old.ravel() cv2.line(img_LK_cleaned, (c, d), (c + motion_real[i, 0], d + motion_real[i, 1]), (0, 255, 0), 2) cv2.circle(img_LK_cleaned, (c, d), 5, (0, 255, 0), -1) zc.display_image('LKflow_cleaned', img_LK_cleaned, is_resize=False, wait_time=config.DISPLAY_WAIT_TIME) score_flow = np.zeros(bw.shape, dtype=np.float32) score_flow[reallygood_old[:, 1].astype(np.int), reallygood_old[:, 0].astype(np.int)] = np.sqrt( np.sum(np.square(motion_real), axis=1)) score_flow = score_flow * row_score score_horizonal = np.sum(score_flow, axis=0) low_pass_h = np.ones(120) low_pass_h /= low_pass_h.sum() score_horizonal_filtered_LK = np.convolve(score_horizonal, low_pass_h, mode='same') if 'LK_hist' in display_list: plot_bar(score_horizonal_filtered_LK, name='LK_hist') print np.argmax(score_horizonal_filtered_LK) # if motion too small, probably no one is there... if np.max(score_horizonal_filtered_LK) < 300: # TODO: this is also a possible indication that the rally is not on rtn_msg = { 'status': 'fail', 'message': 'Motion too small, probably no one in the scene' } return (rtn_msg, None) if 'opponent' in display_list: cv2.circle(img_opponent, (np.argmax(score_horizonal_filtered_LK), 220), 20, (0, 0, 255), -1) #print "time2: %f" % (current_milli_time() - start_time) ## method 3: remove white wall mask_white = zc.color_inrange(img_prev, 'HSV', S_U=50, V_L=130) zc.check_and_display('mask_white_wall', mask_white, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) score = row_score score[bool_img_invalid] = 0 score[bool_white] = 0 score_horizonal = np.sum(score, axis=0) low_pass_h = np.ones(120) low_pass_h /= low_pass_h.sum() score_horizonal_filtered_wall = np.convolve(score_horizonal, low_pass_h, mode='same') if 'wall_hist' in display_list: plot_bar(score_horizonal_filtered_wall, name='wall_hist') print np.argmax(score_horizonal_filtered_wall) if 'opponent' in display_list: cv2.circle(img_opponent, (np.argmax(score_horizonal_filtered_wall), 220), 20, (255, 0, 0), -1) #print "time3: %f" % (current_milli_time() - start_time) ## combining results of three methods #score_horizonal_filtered = score_horizonal_filtered_dense * score_horizonal_filtered_LK * score_horizonal_filtered_wall score_horizonal_filtered = score_horizonal_filtered_dense / 10 + score_horizonal_filtered_LK * 10 opponent_x = np.argmax(score_horizonal_filtered) if 'opponent' in display_list: cv2.circle(img_opponent, (opponent_x, 220), 20, (200, 200, 200), -1) zc.check_and_display('opponent', img_opponent, display_list, is_resize=False, wait_time=config.DISPLAY_WAIT_TIME) rtn_msg = {'status': 'success'} return (rtn_msg, opponent_x)
def _generate_guidance(self, header, state_info_str, engine_id): if config.RECOGNIZE_ONLY: return json.dumps(result) if self.is_first_frame: # do something special when the task begins result, img_guidance = self.task.get_first_guidance() result['image'] = b64encode(zc.cv_image2raw(img_guidance)) zc.check_and_display('guidance', img_guidance, display_list, wait_time=config.DISPLAY_WAIT_TIME, resize_max=config.DISPLAY_MAX_PIXEL) self.is_first_frame = False header['status'] = result.pop('status') result.pop('animation', None) return json.dumps(result) header['status'] = "success" result = {} # default state_info = json.loads(state_info_str) if not state_info['trust']: header['status'] = "fail" return json.dumps(result) state = state_info['state'] if state == "None": header['status'] = "nothing" return json.dumps(result) bitmap = np.array(json.loads(state)) ## try to commit bitmap state_change = False if bm.bitmap_same(self.commited_bitmap, bitmap): pass else: current_time = time.time() if not bm.bitmap_same(self.temp_bitmap['bitmap'], bitmap): self.temp_bitmap['bitmap'] = bitmap self.temp_bitmap['first_time'] = current_time self.temp_bitmap['count'] = 0 self.temp_bitmap['count'] += 1 if current_time - self.temp_bitmap[ 'first_time'] > config.BM_WINDOW_MIN_TIME or self.temp_bitmap[ 'count'] >= config.BM_WINDOW_MIN_COUNT: self.commited_bitmap = self.temp_bitmap['bitmap'] state_change = True bitmap = self.commited_bitmap if 'lego_syn' in display_list and bitmap is not None: img_syn = bm.bitmap2syn_img(bitmap) zc.display_image('lego_syn', img_syn, wait_time=config.DISPLAY_WAIT_TIME, resize_scale=50) ## now user has done something, provide some feedback img_guidance = None if state_change: self.task.update_state(bitmap) result, img_guidance = self.task.get_guidance() result['image'] = b64encode(zc.cv_image2raw(img_guidance)) header['status'] = result.pop('status') result.pop('animation', None) if img_guidance is not None: zc.check_and_display('guidance', img_guidance, display_list, wait_time=config.DISPLAY_WAIT_TIME, resize_max=config.DISPLAY_MAX_PIXEL) return json.dumps(result)
def _handle_img(self, img): if self.is_first_frame and not config.RECOGNIZE_ONLY: # do something special when the task begins result, img_guidance = self.task.get_first_guidance() zc.check_and_display('guidance', img_guidance, display_list, wait_time=config.DISPLAY_WAIT_TIME, resize_max=config.DISPLAY_MAX_PIXEL) result['image'] = b64encode(zc.cv_image2raw(img_guidance)) result.pop('animation', None) self.is_first_frame = False return json.dumps(result) result = {'status': "nothing"} # default stretch_ratio = float(16) / 9 * img.shape[0] / img.shape[1] if img.shape != (config.IMAGE_WIDTH, config.IMAGE_HEIGHT, 3): img = cv2.resize(img, (config.IMAGE_WIDTH, config.IMAGE_HEIGHT), interpolation=cv2.INTER_AREA) ## get bitmap for current image zc.check_and_display('input', img, display_list, wait_time=config.DISPLAY_WAIT_TIME, resize_max=config.DISPLAY_MAX_PIXEL) rtn_msg, bitmap = lc.process(img, stretch_ratio, display_list) if gabriel.Debug.TIME_MEASUREMENT: result[gabriel.Protocol_measurement. JSON_KEY_APP_SYMBOLIC_TIME] = time.time() if rtn_msg['status'] != 'success': print rtn_msg['message'] if rtn_msg[ 'message'] == "Not confident about reconstruction, maybe too much noise": self.counter['not_confident'] += 1 return json.dumps(result) self.counter['confident'] += 1 ## try to commit bitmap state_change = False if bm.bitmap_same(self.commited_bitmap, bitmap): pass else: current_time = time.time() if not bm.bitmap_same(self.temp_bitmap['bitmap'], bitmap): self.temp_bitmap['bitmap'] = bitmap self.temp_bitmap['first_time'] = current_time self.temp_bitmap['count'] = 0 self.counter['diff_from_prev'] += 1 else: self.counter['same_as_prev'] += 1 self.temp_bitmap['count'] += 1 if current_time - self.temp_bitmap[ 'first_time'] > config.BM_WINDOW_MIN_TIME or self.temp_bitmap[ 'count'] >= config.BM_WINDOW_MIN_COUNT: self.commited_bitmap = self.temp_bitmap['bitmap'] state_change = True #print "\n\n\n\n\n%s\n\n\n\n\n" % self.counter bitmap = self.commited_bitmap if 'lego_syn' in display_list and bitmap is not None: img_syn = bm.bitmap2syn_img(bitmap) zc.display_image('lego_syn', img_syn, wait_time=config.DISPLAY_WAIT_TIME, resize_scale=50) if config.RECOGNIZE_ONLY: return json.dumps(result) ## now user has done something, provide some feedback img_guidance = None if state_change: self.task.update_state(bitmap) sym_time = result[ gabriel.Protocol_measurement.JSON_KEY_APP_SYMBOLIC_TIME] result, img_guidance = self.task.get_guidance() result[gabriel.Protocol_measurement. JSON_KEY_APP_SYMBOLIC_TIME] = sym_time result['image'] = b64encode(zc.cv_image2raw(img_guidance)) result.pop('animation', None) if img_guidance is not None: zc.check_and_display('guidance', img_guidance, display_list, wait_time=config.DISPLAY_WAIT_TIME, resize_max=config.DISPLAY_MAX_PIXEL) return json.dumps(result)
def _handle_img(self, img): result = {'status': "nothing"} # default frame_time = current_milli_time() self.state['is_playing'] = self.ball_trace.is_playing( frame_time) and self.seen_opponent if 'state' in display_list: pc.display_state(self.state) ## preprocessing of input image if max(img.shape) != config.IMAGE_MAX_WH: resize_ratio = float(config.IMAGE_MAX_WH) / max(img.shape) img = cv2.resize(img, (0, 0), fx=resize_ratio, fy=resize_ratio, interpolation=cv2.INTER_AREA) zc.check_and_display('input', img, display_list, resize_max=config.DISPLAY_MAX_PIXEL, wait_time=config.DISPLAY_WAIT_TIME) #pc.check_image(img, display_list) ## check if two frames are too close if self.prev_frame_info is not None and frame_time - self.prev_frame_info[ 'time'] < 80: LOG.info(LOG_TAG + "two frames too close!") if gabriel.Debug.TIME_MEASUREMENT: result[gabriel.Protocol_measurement. JSON_KEY_APP_SYMBOLIC_TIME] = time.time() return json.dumps(result) ## find table rtn_msg, objects = pc.find_table(img, display_list) if rtn_msg['status'] != 'success': LOG.info(LOG_TAG + rtn_msg['message']) if gabriel.Debug.TIME_MEASUREMENT: result[gabriel.Protocol_measurement. JSON_KEY_APP_SYMBOLIC_TIME] = time.time() return json.dumps(result) img_rotated, mask_table, rotation_matrix = objects current_frame_info = { 'time': frame_time, 'img': img, 'img_rotated': img_rotated, 'mask_ball': None } ## in case we don't have good "previous" frame, process the current one and return mask_ball = None ball_stat = None if self.prev_frame_info is None or frame_time - self.prev_frame_info[ 'time'] > 300: LOG.info(LOG_TAG + "previous frame not good") rtn_msg, objects = pc.find_pingpong(img, None, mask_table, None, rotation_matrix, display_list) if rtn_msg['status'] != 'success': LOG.info(LOG_TAG + rtn_msg['message']) else: mask_ball, ball_stat = objects self.ball_trace.insert((frame_time, ball_stat)) current_frame_info['mask_ball'] = mask_ball self.prev_frame_info = current_frame_info if gabriel.Debug.TIME_MEASUREMENT: result[gabriel.Protocol_measurement. JSON_KEY_APP_SYMBOLIC_TIME] = time.time() return json.dumps(result) ## now we do have an okay previous frame rtn_msg, objects = pc.find_pingpong(img, self.prev_frame_info['img'], mask_table, self.prev_frame_info['mask_ball'], rotation_matrix, display_list) if rtn_msg['status'] != 'success': LOG.info(LOG_TAG + rtn_msg['message']) else: mask_ball, ball_stat = objects self.ball_trace.insert((frame_time, ball_stat)) current_frame_info['mask_ball'] = mask_ball ## determine where the wall was hit to self.state['ball_position'] = self.ball_trace.leftOrRight() if 'state' in display_list: pc.display_state(self.state) ## find position (relatively, left or right) of your opponent rtn_msg, objects = pc.find_opponent( img_rotated, self.prev_frame_info['img_rotated'], display_list) if rtn_msg['status'] != 'success': self.seen_opponent = False if 'state' in display_list: pc.display_state(self.state) print rtn_msg['message'] self.prev_frame_info = current_frame_info result[gabriel.Protocol_measurement. JSON_KEY_APP_SYMBOLIC_TIME] = time.time() return json.dumps(result) self.seen_opponent = True opponent_x = objects # a simple averaging over history self.opponent_x = self.opponent_x * 0.7 + opponent_x * 0.3 self.state[ 'opponent_position'] = "left" if self.opponent_x < config.O_IMG_WIDTH * 0.58 else "right" if 'state' in display_list: pc.display_state(self.state) ## mode for not proving feedback if config.RECOGNIZE_ONLY: self.prev_frame_info = current_frame_info return json.dumps(result) ## now user has done something, provide some feedback result[gabriel.Protocol_measurement. JSON_KEY_APP_SYMBOLIC_TIME] = time.time() t = time.time() result['status'] = "success" if self.state['is_playing']: #if self.state['ball_position'] == "left" and self.state['opponent_position'] == "left": if self.state['opponent_position'] == "left": if (t - self.last_played_t < 3 and self.last_played == "right") or (t - self.last_played_t < 1): result['status'] = "nothing" return json.dumps(result) result['speech'] = "right" print "\n\n\n\nright\n\n\n\n" self.last_played_t = t self.last_played = "right" if config.PLAY_SOUND: self.sound_sock.sendall("right") #elif self.state['ball_position'] == "right" and self.state['opponent_position'] == "right": elif self.state['opponent_position'] == "right": if (t - self.last_played_t < 3 and self.last_played == "left") or (t - self.last_played_t < 1): result['status'] = "nothing" return json.dumps(result) result['speech'] = "left" print "\n\n\n\nleft\n\n\n\n" self.last_played_t = t self.last_played = "left" if config.PLAY_SOUND: self.sound_sock.sendall("leftt") else: result['status'] = "nothing" else: result['status'] = "nothing" return json.dumps(result)