def preprocess_entry(self, entry): """Use annotations to segment eyes and calculate gaze direction.""" full_image = entry['full_image'] json_data = entry['json_data'] del entry['full_image'] # release cache del entry['json_data'] ih, iw = full_image.shape oh, ow = self._eye_image_shape def process_coords(coords_list): coords = [eval(l) for l in coords_list] # eval(): str Conversion to List return np.array([(x, ih - y, z) for (x, y, z) in coords]) interior_landmarks = process_coords(json_data['interior_margin_2d']) caruncle_landmarks = process_coords(json_data['caruncle_2d']) iris_landmarks = process_coords(json_data['iris_2d']) # Only select almost frontal images # h_pitch, h_yaw, _ = eval(json_data['head_pose']) # if h_pitch > 180.0: # Need to correct pitch # h_pitch -= 360.0 # h_yaw -= 180.0 # Need to correct yaw # if abs(h_pitch) > 20 or abs(h_yaw) > 20: # return None # Prepare to segment eye image left_corner = np.mean(caruncle_landmarks[:, :2], axis=0) right_corner = interior_landmarks[8, :2] eye_width = 1.5 * abs(left_corner[0] - right_corner[0]) up_corner = np.amin(interior_landmarks[:, 1], axis=0) down_corner = np.amax(interior_landmarks[:, 1], axis=0) # eye_height = 1.5 * abs(down_corner - up_corner) # fixed width height rate eye_height = 0.6 * eye_width eye_middle = np.mean([ np.amin(interior_landmarks[:, :2], axis=0), np.amax(interior_landmarks[:, :2], axis=0) ], axis=0) top_l_x = abs(int(eye_middle[0] - eye_width / 2.0)) top_l_y = abs(int(eye_middle[1] - eye_height / 2.0)) down_r_x = abs(int(eye_middle[0] + eye_width / 2.0)) down_r_y = abs(int(eye_middle[1] + eye_height / 2.0)) # segment eye image eye = full_image[top_l_y:down_r_y, top_l_x:down_r_x] # Convert look vector to gaze direction in polar angles look_vec = np.array(eval(json_data['eye_details']['look_vec']))[:3] look_vec[0] = -look_vec[0] gaze = util.gaze.vector_to_pitchyaw(look_vec.reshape((1, 3))).flatten() if gaze[1] > 0.0: gaze[1] = np.pi - gaze[1] elif gaze[1] < 0.0: gaze[1] = -(np.pi + gaze[1]) entry['gaze'] = gaze.astype(np.float32) """Resize eye image and normalize intensities.""" # oh, ow = self._eye_image_shape # eye = entry['eye'] eye = cv.resize(eye, (ow, oh)) eye = eye.astype(np.float32) eye *= 2.0 / 255.0 eye -= 1.0 eye = np.expand_dims( eye, axis=0 if self.data_format == 'NCHW' else -1) # add N dims to NCHW entry['eye'] = eye # Select and transform landmark coordinates landmarks = np.concatenate([ interior_landmarks[:, :2], # 16 iris_landmarks[:, :2], # 32 ]) # 48 in total landmarks = landmarks - [top_l_x, top_l_y] entry['gazemaps'] = util.gazemap_uteyes.from_gaze2d( landmarks, intput_size=(int(eye_height), int(eye_width)), output_size=(oh, ow), scale=0.5, ).astype(np.float32) if self.data_format == 'NHWC': np.transpose(entry['gazemaps'], (1, 2, 0)) # Ensure all values in an entry are 4-byte floating point numbers for key, value in entry.items(): entry[key] = value.astype(np.float32) return entry
def preprocess_entry(self, entry): """Use annotations to segment eyes and calculate gaze direction.""" full_image = entry['full_image'] json_data = entry['json_data'] del entry['full_image'] del entry['json_data'] ih, iw = full_image.shape iw_2, ih_2 = 0.5 * iw, 0.5 * ih oh, ow = self._eye_image_shape def process_coords(coords_list): coords = [eval(l) for l in coords_list] return np.array([(x, ih - y, z) for (x, y, z) in coords]) interior_landmarks = process_coords(json_data['interior_margin_2d']) caruncle_landmarks = process_coords(json_data['caruncle_2d']) iris_landmarks = process_coords(json_data['iris_2d']) random_multipliers = [] def value_from_type(augmentation_type): # Scale to be in range easy_value, hard_value = self._augmentation_ranges[ augmentation_type] value = (hard_value - easy_value) * self._difficulty + easy_value value = (np.clip(value, easy_value, hard_value) if easy_value < hard_value else np.clip( value, hard_value, easy_value)) return value def noisy_value_from_type(augmentation_type): # Get normal distributed random value if len(random_multipliers) == 0: random_multipliers.extend( list( np.random.normal( size=(len(self._augmentation_ranges), )))) return random_multipliers.pop() * value_from_type( augmentation_type) # Only select almost frontal images h_pitch, h_yaw, _ = eval(json_data['head_pose']) if h_pitch > 180.0: # Need to correct pitch h_pitch -= 360.0 h_yaw -= 180.0 # Need to correct yaw if abs(h_pitch) > 20 or abs(h_yaw) > 20: return None # Prepare to segment eye image left_corner = np.mean(caruncle_landmarks[:, :2], axis=0) right_corner = interior_landmarks[8, :2] eye_width = 1.5 * abs(left_corner[0] - right_corner[0]) eye_middle = np.mean([ np.amin(interior_landmarks[:, :2], axis=0), np.amax(interior_landmarks[:, :2], axis=0) ], axis=0) # Centre axes to eyeball centre translate_mat = np.asmatrix(np.eye(3)) translate_mat[:2, 2] = [[-iw_2], [-ih_2]] # Rotate eye image if requested rotate_mat = np.asmatrix(np.eye(3)) rotation_noise = noisy_value_from_type('rotation') if rotation_noise > 0: rotate_angle = np.radians(rotation_noise) cos_rotate = np.cos(rotate_angle) sin_rotate = np.sin(rotate_angle) rotate_mat[0, 0] = cos_rotate rotate_mat[0, 1] = -sin_rotate rotate_mat[1, 0] = sin_rotate rotate_mat[1, 1] = cos_rotate # Scale image to fit output dimensions (with a little bit of noise) scale_mat = np.asmatrix(np.eye(3)) scale = 1. + noisy_value_from_type('scale') scale_inv = 1. / scale np.fill_diagonal(scale_mat, ow / eye_width * scale) original_eyeball_radius = 71.7593 eyeball_radius = original_eyeball_radius * scale_mat[ 0, 0] # See: https://goo.gl/ZnXgDE entry['radius'] = np.float32(eyeball_radius) # Re-centre eye image such that eye fits (based on determined `eye_middle`) recentre_mat = np.asmatrix(np.eye(3)) recentre_mat[0, 2] = iw / 2 - eye_middle[0] + 0.5 * eye_width * scale_inv recentre_mat[ 1, 2] = ih / 2 - eye_middle[1] + 0.5 * oh / ow * eye_width * scale_inv recentre_mat[0, 2] += noisy_value_from_type('translation') # x recentre_mat[1, 2] += noisy_value_from_type('translation') # y # Apply transforms transform_mat = recentre_mat * scale_mat * rotate_mat * translate_mat eye = cv.warpAffine(full_image, transform_mat[:2, :3], (ow, oh)) # Convert look vector to gaze direction in polar angles look_vec = np.array(eval(json_data['eye_details']['look_vec']))[:3] look_vec[0] = -look_vec[0] original_gaze = util.gaze.vector_to_pitchyaw(look_vec.reshape( (1, 3))).flatten() look_vec = rotate_mat * look_vec.reshape(3, 1) gaze = util.gaze.vector_to_pitchyaw(look_vec.reshape((1, 3))).flatten() if gaze[1] > 0.0: gaze[1] = np.pi - gaze[1] elif gaze[1] < 0.0: gaze[1] = -(np.pi + gaze[1]) entry['gaze'] = gaze.astype(np.float32) # Draw line randomly num_line_noise = int(np.round(noisy_value_from_type('num_line'))) if num_line_noise > 0: line_rand_nums = np.random.rand(5 * num_line_noise) for i in range(num_line_noise): j = 5 * i lx0, ly0 = int(ow * line_rand_nums[j]), oh lx1, ly1 = ow, int(oh * line_rand_nums[j + 1]) direction = line_rand_nums[j + 2] if direction < 0.25: lx1 = ly0 = 0 elif direction < 0.5: lx1 = 0 elif direction < 0.75: ly0 = 0 line_colour = int(255 * line_rand_nums[j + 3]) eye = cv.line(eye, (lx0, ly0), (lx1, ly1), color=(line_colour, line_colour, line_colour), thickness=int(6 * line_rand_nums[j + 4]), lineType=cv.LINE_AA) # Rescale image if required rescale_max = value_from_type('rescale') if rescale_max < 1.0: rescale_noise = np.random.uniform(low=rescale_max, high=1.0) interpolation = cv.INTER_CUBIC eye = cv.resize(eye, dsize=(0, 0), fx=rescale_noise, fy=rescale_noise, interpolation=interpolation) eye = cv.equalizeHist(eye) eye = cv.resize(eye, dsize=(ow, oh), interpolation=interpolation) # Add rgb noise to eye image intensity_noise = int(value_from_type('intensity')) if intensity_noise > 0: eye = eye.astype(np.int16) eye += np.random.randint(low=-intensity_noise, high=intensity_noise, size=eye.shape, dtype=np.int16) cv.normalize(eye, eye, alpha=0, beta=255, norm_type=cv.NORM_MINMAX) eye = eye.astype(np.uint8) # Add blur to eye image blur_noise = noisy_value_from_type('blur') if blur_noise > 0: eye = cv.GaussianBlur(eye, (7, 7), 0.5 + np.abs(blur_noise)) # Histogram equalization and preprocessing for NN eye = cv.equalizeHist(eye) eye = eye.astype(np.float32) eye *= 2.0 / 255.0 eye -= 1.0 eye = np.expand_dims(eye, -1 if self.data_format == 'NHWC' else 0) entry['eye'] = eye # Select and transform landmark coordinates iris_centre = np.asarray([ iw_2 + original_eyeball_radius * -np.cos(original_gaze[0]) * np.sin(original_gaze[1]), ih_2 + original_eyeball_radius * -np.sin(original_gaze[0]), ]) landmarks = np.concatenate([ interior_landmarks[::2, :2], # 8 iris_landmarks[::4, :2], # 8 iris_centre.reshape((1, 2)), [[iw_2, ih_2]], # Eyeball centre ]) # 18 in total landmarks = np.asmatrix( np.pad(landmarks, ((0, 0), (0, 1)), 'constant', constant_values=1)) landmarks = np.asarray(landmarks * transform_mat.T) landmarks = landmarks[:, :2] # We only need x, y entry['landmarks'] = landmarks.astype(np.float32) # Generate heatmaps if necessary if self._generate_heatmaps: # Should be half-scale (compared to eye image) entry['heatmaps'] = np.asarray([ util.heatmap.gaussian_2d( shape=(self._heatmaps_scale * oh, self._heatmaps_scale * ow), centre=self._heatmaps_scale * landmark, sigma=value_from_type('heatmap_sigma'), ) for landmark in entry['landmarks'] ]).astype(np.float32) if self.data_format == 'NHWC': entry['heatmaps'] = np.transpose(entry['heatmaps'], (1, 2, 0)) return entry
def preprocess_unityeyes_image(img, json_data): ow = 160 oh = 96 # Prepare to segment eye image ih, iw = img.shape[:2] ih_2, iw_2 = ih / 2.0, iw / 2.0 heatmap_w = int(ow / 2) heatmap_h = int(oh / 2) img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) def process_coords(coords_list): coords = [eval(l) for l in coords_list] return np.array([(x, ih - y, z) for (x, y, z) in coords]) interior_landmarks = process_coords(json_data['interior_margin_2d']) caruncle_landmarks = process_coords(json_data['caruncle_2d']) iris_landmarks = process_coords(json_data['iris_2d']) left_corner = np.mean(caruncle_landmarks[:, :2], axis=0) right_corner = interior_landmarks[8, :2] eye_width = 1.5 * abs(left_corner[0] - right_corner[0]) eye_middle = np.mean([ np.amin(interior_landmarks[:, :2], axis=0), np.amax(interior_landmarks[:, :2], axis=0) ], axis=0) # Normalize to eye width. scale = ow / eye_width translate = np.asmatrix(np.eye(3)) translate[0, 2] = -eye_middle[0] * scale translate[1, 2] = -eye_middle[1] * scale rand_x = np.random.uniform(low=-10, high=10) rand_y = np.random.uniform(low=-10, high=10) recenter = np.asmatrix(np.eye(3)) recenter[0, 2] = ow / 2 + rand_x recenter[1, 2] = oh / 2 + rand_y scale_mat = np.asmatrix(np.eye(3)) scale_mat[0, 0] = scale scale_mat[1, 1] = scale angle = 0 #np.random.normal(0, 1) * 20 * np.pi/180 rotation = R.from_rotvec([0, 0, angle]).as_matrix() transform = recenter * rotation * translate * scale_mat transform_inv = np.linalg.inv(transform) # Apply transforms eye = cv2.warpAffine(img, transform[:2], (ow, oh)) rand_blur = np.random.uniform(low=0, high=20) eye = cv2.GaussianBlur(eye, (5, 5), rand_blur) # Normalize eye image eye = cv2.equalizeHist(eye) eye = eye.astype(np.float32) eye = eye / 255.0 # Gaze # Convert look vector to gaze direction in polar angles look_vec = np.array(eval( json_data['eye_details']['look_vec']))[:3].reshape((1, 3)) #look_vec = np.matmul(look_vec, rotation.T) gaze = util.gaze.vector_to_pitchyaw(-look_vec).flatten() gaze = gaze.astype(np.float32) iris_center = np.mean(iris_landmarks[:, :2], axis=0) landmarks = np.concatenate([ interior_landmarks[:, :2], # 8 iris_landmarks[::2, :2], # 8 iris_center.reshape((1, 2)), [[iw_2, ih_2]], # Eyeball center ]) # 18 in total landmarks = np.asmatrix( np.pad(landmarks, ((0, 0), (0, 1)), 'constant', constant_values=1)) landmarks = np.asarray(landmarks * transform[:2].T) * np.array( [heatmap_w / ow, heatmap_h / oh]) landmarks = landmarks.astype(np.float32) # Swap columns so that landmarks are in (y, x), not (x, y) # This is because the network outputs landmarks as (y, x) values. temp = np.zeros((34, 2), dtype=np.float32) temp[:, 0] = landmarks[:, 1] temp[:, 1] = landmarks[:, 0] landmarks = temp heatmaps = get_heatmaps(w=heatmap_w, h=heatmap_h, landmarks=landmarks) assert heatmaps.shape == (34, heatmap_h, heatmap_w) return { 'img': eye, 'transform': np.asarray(transform), 'transform_inv': np.asarray(transform_inv), 'eye_middle': np.asarray(eye_middle), 'heatmaps': np.asarray(heatmaps), 'landmarks': np.asarray(landmarks), 'gaze': np.asarray(gaze) }