def cluster(self, gaze_points_list, inplace=False): """ Find clusters of input gaze data and label clustered points as smooth pursuit. Labels (sets the 'EYE_MOVEMENT_TYPE' field) the clusters of data points as 'SP', other samples as 'NOISE_CLUSTER'. New column 'CLUSTER_ID' is added into the @DATA section of each arff object in @gaze_points_list, indicating cluster group ID. :param gaze_points_list: a list of arff objects (dictionary with fields such as 'data' and 'metadata') :param inplace: whether to modify the original input gaze data with gaze data after clustering or use a copy :return: gaze data after clustering in the same form as the input data. """ if not inplace: gaze_points_list = copy.deepcopy(gaze_points_list) # add global indexing to be able to reference the particular sample even after clustering all in one structure for ind in xrange(len(gaze_points_list)): ArffHelper.add_column(gaze_points_list[ind], name='global_index', dtype='INTEGER', default_value=-1) gaze_points_list[ind]['data']['global_index'] = np.arange(gaze_points_list[ind]['data'].shape[0]) self._setup_internal_parameters(gaze_points_list) self._data_set = self._aggregate_data(gaze_points_list) # has to be a copy, so that is is placed continuously in memory self._timestamps = self._data_set['time'].copy() current_cluster_id = 0 for i in xrange(len(self._data_set)): if self._data_set[i]['visited_flag'] == 1: continue else: self._data_set[i]['visited_flag'] = 1 neighbourhood = self._get_neighbourhood(i) if self._validate_neighbourhood(neighbourhood): # if not: mark current point as NOISE self._expand_cluster(i, neighbourhood, current_cluster_id) current_cluster_id += 1 # create a new column in gaze_points_list for CLUSTER_ID for i in xrange(len(gaze_points_list)): ArffHelper.add_column(gaze_points_list[i], 'CLUSTER_ID', 'NUMERIC', -1) # label data in gaze_points_list as SP according to CLUSTER_ID for i in xrange(len(self._data_set)): observer_id = int(self._data_set[i]['observer_id']) global_index = self._data_set[i]['global_index'] if self._data_set[i]['CLUSTER_ID'] != -1: gaze_points_list[observer_id]['data']['EYE_MOVEMENT_TYPE'][global_index] = 'SP' gaze_points_list[observer_id]['data']['CLUSTER_ID'][global_index] = self._data_set[i]['CLUSTER_ID'] else: gaze_points_list[observer_id]['data']['EYE_MOVEMENT_TYPE'][global_index] = 'NOISE_CLUSTER' # can now remove the global_index column for ind in xrange(len(gaze_points_list)): ArffHelper.remove_column(gaze_points_list[ind], name='global_index') return gaze_points_list
def add_eye_movement_attribute(arff_object): """ Add the EYE_MOVEMENT_TYPE attribute to the @arff_object. If already present, do nothing. :param arff_object: arff object :return: arff object with added column for eye movement type """ from recording_processor import EM_TYPE_ATTRIBUTE_NAME, EM_TYPE_ARFF_DATA_TYPE, EM_TYPE_DEFAULT_VALUE if 'EYE_MOVEMENT_TYPE' not in arff_object['data'].dtype.names: ArffHelper.add_column(arff_object, EM_TYPE_ATTRIBUTE_NAME, EM_TYPE_ARFF_DATA_TYPE, EM_TYPE_DEFAULT_VALUE) return arff_object
def test_add_column(self): name = 'EM_type' dtype = ['UNKNOWN', 'FIX', 'SACCADE', 'SP', 'NOISE'] a = ArffHelper.load(open('test_data/arff_data_example.arff')) b = ArffHelper.add_column(a, name, dtype, 'UNKNOWN') a['attributes'].append((name, dtype)) self.assertEqual(a['attributes'], b['attributes'])
def detect(self, gaze_points, inplace=False): """ This method labels saccades (also noise) in the provided gaze_points, which should be an arff object :param gaze_points: gaze recording data, an arff object (i.e. a dictionary with 'data', 'metadata' and etc. keys) :param inplace: whether to replace the data inside @gaze_points or create a new structure :return: gaze points with added labels SACCADE, NOISE """ if not inplace: gaze_points = copy.deepcopy(gaze_points) # also keep track of saccadic and intersaccadic intervals detected_saccades_count = 0 if 'SACC_INTERVAL_INDEX' not in gaze_points['data'].dtype.names: ArffHelper.add_column(gaze_points, 'SACC_INTERVAL_INDEX', 'INTEGER', -1) # a virtual saccade that finished before the recording for uniform processing last_saccade_end = -1 intersaccadic_intervals_count = 0 if 'INTERSACC_INTERVAL_INDEX' not in gaze_points['data'].dtype.names: ArffHelper.add_column(gaze_points, 'INTERSACC_INTERVAL_INDEX', 'INTEGER', -1) # verify that the timestamps are sorted! times = gaze_points['data']['time'] assert all(times[i] <= times[i + 1] for i in xrange(len(times) - 1)), \ 'Timestamps are not sorted in {}'.format(gaze_points['metadata']['filename']) # -1 so that the exact value ends up on the right of the searched timestamp searchable_timestamps = times - self.VELOCITY_INTEGRAL_INTERVAL_MICROSEC - 1 # find the indices of the first prev_indices = np.searchsorted(times, searchable_timestamps, side='right') cur_indices = np.arange(len(prev_indices)) # if the index after search points towards this very data point, take the previous one prev_indices[prev_indices == cur_indices] -= 1 # except for the very first sample prev_indices[0] = 0 # computing velocities x_shifts = gaze_points['data']['x'][cur_indices] - gaze_points['data'][ 'x'][prev_indices] y_shifts = gaze_points['data']['y'][cur_indices] - gaze_points['data'][ 'y'][prev_indices] shifts = np.linalg.norm(np.vstack([x_shifts, y_shifts]), axis=0) time_shifts = gaze_points['data']['time'][cur_indices] - gaze_points[ 'data']['time'][prev_indices] # keep it above 0, the shifts are 0 there anyway time_shifts[time_shifts == 0] += 1 velocities = shifts / time_shifts # pixels per microsecond ppd = util.calculate_ppd(gaze_points) velocities /= ppd # degree per microsecond velocities *= 1e6 # degree per second # How many samples back is it reasonable to go? time_step = np.diff(times).mean() # a big margin of error, 10 times as many samples as would normally need extra_samples_count = int( np.round((self.MAX_DURATION_MICROSEC * 10) / time_step)) # Glitch detection: glitches are defined by one of several features. # # (1) Coordinates far outside the calibrated region (what constitutes far is defined # by the tolerance parameter) are assumed to be erroneous. is_glitch = np.zeros(gaze_points['data'].shape[0], dtype=np.bool) is_glitch[gaze_points['data']['x'] < -gaze_points['metadata']['width_px'] * self.TOLERANCE] = True is_glitch[ gaze_points['data']['y'] < -gaze_points['metadata']['height_px'] * self.TOLERANCE] = True is_glitch[ gaze_points['data']['x'] > gaze_points['metadata']['width_px'] * (1 + self.TOLERANCE)] = True is_glitch[ gaze_points['data']['y'] > gaze_points['metadata']['height_px'] * (1 + self.TOLERANCE)] = True # (2) If the @gaze_points supports the estimate of a confidence # measure for samples, a confidence lower than 0.1 also indicates # a glitch here. if 'confidence' in gaze_points['data'].dtype.names: is_glitch[gaze_points['data']['confidence'] < 0.1] = True # (3) Finally, velocities that exceed \a maxSpeed (default currently # set to ~1000 degrees/s) are regarded as glitches as well and labelled as noise is_glitch[velocities > self.MAX_SPEED_DEGREE_PER_SEC] = True gaze_points['data']['EYE_MOVEMENT_TYPE'][ velocities > self.MAX_SPEED_DEGREE_PER_SEC] = 'NOISE' # Remember first sample after glitch: # to prevent saccade detection at the first non-glitch sample # that follows, saccade detection is inhibited for that first sample. post_glitch = np.diff(is_glitch.astype(int)) == -1 post_glitch = np.hstack(([False], post_glitch)) # Remember last sample before glitch: # since we normally would suspend the other criteria (incl. speed) if we are inside glitch, we try to avoid # border effects in both next-after and last-before glitch samples pre_glitch = np.diff(is_glitch.astype(int)) == 1 pre_glitch = np.hstack((pre_glitch, [False])) all_glitch = is_glitch + post_glitch + pre_glitch # we will assign glitch samples' labels to NOISE after the saccades have been detected # recompute speeds for post-glitch samples pre_glitch_indices = np.nonzero(pre_glitch)[0] for i in np.nonzero(post_glitch)[0]: # find the corresponding start of the glitch corresponding_pre_glitch = np.searchsorted(pre_glitch_indices, i) - 1 if corresponding_pre_glitch < 0: # no correspondence found, it's the glitch from the beginning of recording ==> set velocity to 0 velocities[i] = 0 else: # found a completed glitch velocities[i] = np.linalg.norm([ gaze_points['data']['x'][i] - gaze_points['data']['x'][corresponding_pre_glitch], gaze_points['data']['y'][i] - gaze_points['data']['y'][corresponding_pre_glitch] ]) / (times[i] - times[corresponding_pre_glitch] ) # pixels per microsecond velocities[i] /= ppd # degrees per microsecond velocities[i] *= 1e6 # degrees per second # Looking for saccade seed points # saccade seed point should # (1) exceed the fast threshold # (2) be biologically plausible # (3) not be inside a glitch saccade_seeds = (velocities > self.THRESHOLD_ONSET_FAST_DEGREE_PER_SEC) * \ (velocities < self.MAX_SPEED_DEGREE_PER_SEC) * \ (1 - all_glitch) saccade_seed_indices = np.nonzero(saccade_seeds)[0] for potential_seed_index in saccade_seed_indices: if gaze_points['data']['EYE_MOVEMENT_TYPE'][ potential_seed_index] != 'UNKNOWN': # already labelled this before, ex. as a saccade that started from another seed point continue if self.verbose == 2: print >> sys.stderr, 'potential seed index', potential_seed_index # Looking for onset: # (1) should be above slow threshold speed # (2) should not be a glitch # (3) does not yet have a label onset_candidates_check = (velocities[max(0, potential_seed_index - extra_samples_count):potential_seed_index] >= self.THRESHOLD_ONSET_SLOW_DEGREE_PER_SEC) * \ (1 - is_glitch[max(0, potential_seed_index - extra_samples_count):potential_seed_index]) * \ (gaze_points['data']['EYE_MOVEMENT_TYPE'][ max(0, potential_seed_index - extra_samples_count):potential_seed_index ] == 'UNKNOWN') # find the last zero (the next sample after it is the beginning of the last uninterrupted 1-sequence, # i.e. the saccade onset try: last_zero_index = np.nonzero(1 - onset_candidates_check)[0][-1] except IndexError: # not found continue saccade_onset_index = last_zero_index + 1 + max( 0, potential_seed_index - extra_samples_count) # shift accordingly # also this should not be the glitch or post/pre-glitch sample while all_glitch[saccade_onset_index]: saccade_onset_index += 1 # looking for offset # (1) should be above offset speed threshold # (2) should not exceed biologically plausible duration threshold # (3) should not yet have a label (i.e. not NOISE labelled above) offset_candidates_check = (velocities[potential_seed_index:potential_seed_index + extra_samples_count] >= self.THRESHOLD_OFFSET_DEGREE_PER_SEC) * \ (times[potential_seed_index:potential_seed_index + extra_samples_count] - times[saccade_onset_index] <= self.MAX_DURATION_MICROSEC) # we ignore the criterion around the glitch offset_candidates_check += is_glitch[ potential_seed_index:potential_seed_index + extra_samples_count] offset_candidates_check += post_glitch[ potential_seed_index:potential_seed_index + extra_samples_count] # but there should not yet be a label present, i.e. it's not the NOISE labelled above offset_candidates_check *= ( gaze_points['data']['EYE_MOVEMENT_TYPE'] [potential_seed_index:potential_seed_index + extra_samples_count] == 'UNKNOWN') # find the first zero (this is the first sample with speed below the threshold, i.e. the saccade offset try: saccade_offset_index = np.nonzero( 1 - offset_candidates_check)[0][0] except IndexError: # no offset found continue # the index was starting at potential_seed_index saccade_offset_index += potential_seed_index # if we are finished inside the glitch, we have reached a biological limit of some sorts ==> discard if is_glitch[saccade_offset_index]: continue if self.verbose == 2: print >> sys.stderr, 'Found onset/offset indices', saccade_onset_index, saccade_offset_index # now validate the saccade parameters # (1) it spans at least the minimal necessary interval saccade_time = times[saccade_offset_index] - times[ saccade_onset_index] if saccade_time < self.MIN_DURATION_MICROSEC: # If the resulting saccade is shorter than # a minDuration, we assume that we have only encountered # some noise impulse and discard this saccade. gaze_points['data']['EYE_MOVEMENT_TYPE'][ saccade_onset_index:saccade_offset_index + 1] = 'NOISE' if self.verbose == 2: print >> sys.stderr, 'Discarding due to low duration: needed {}, had {}'.\ format(self.MIN_DURATION_MICROSEC, saccade_time) continue # (2) mean velocity is not below the slow onset threshold saccade_displacement = np.linalg.norm([ gaze_points['data']['x'][saccade_offset_index] - gaze_points['data']['x'][saccade_onset_index], gaze_points['data']['y'][saccade_offset_index] - gaze_points['data']['y'][saccade_onset_index], ]) mean_speed = saccade_displacement / saccade_time # pixels per microsecond mean_speed /= ppd # degrees per microsecond mean_speed *= 1e6 # degrees per second if mean_speed < self.THRESHOLD_ONSET_SLOW_DEGREE_PER_SEC: # Saccades where the average velocity drops below the offset threshold # are also discarded (those are often due to some high-velocity samples # going in one direction, then jumping back - which is unbiological). if self.verbose == 2: print >> sys.stderr, 'Discarding due to low average speed: needed {}, had {}'.format( self.THRESHOLD_ONSET_SLOW_DEGREE_PER_SEC, mean_speed) continue # If all is okay, we detected a whole saccade gaze_points['data']['EYE_MOVEMENT_TYPE'][ saccade_onset_index:saccade_offset_index + 1] = 'SACCADE' # write the saccade index into the appropriate field and update the global count gaze_points['data']['SACC_INTERVAL_INDEX'][saccade_onset_index:saccade_offset_index + 1] = \ detected_saccades_count detected_saccades_count += 1 # from the end of last saccade till the beginning of this one, put appropriate intersaccadic interval index # also update the global count of intersaccadic intervals gaze_points['data']['INTERSACC_INTERVAL_INDEX'][last_saccade_end + 1:saccade_onset_index] = \ intersaccadic_intervals_count intersaccadic_intervals_count += 1 last_saccade_end = saccade_offset_index if self.verbose: print >> sys.stderr, '{0} {1:0.1f} {2:0.1f} {3} {4:0.1f} {5:0.1f}'.format( gaze_points['data'][saccade_onset_index]['time'], gaze_points['data'][saccade_onset_index]['x'], gaze_points['data'][saccade_onset_index]['y'], gaze_points['data'][saccade_offset_index]['time'], gaze_points['data'][saccade_offset_index]['x'], gaze_points['data'][saccade_offset_index]['y'], ) # final intersaccadic interval, if there is one gaze_points['data']['INTERSACC_INTERVAL_INDEX'][last_saccade_end + 1:] = \ intersaccadic_intervals_count intersaccadic_intervals_count += 1 # Override erroneous samples' labels gaze_points['data']['EYE_MOVEMENT_TYPE'][is_glitch] = 'NOISE' return gaze_points
def detect(self, gaze_points, inplace=False): """ Identify and label fixation intervals as 'FIX' and some others as 'NOISE'. Fixation identification includes the following steps: - First, all inter-saccadic intervals with a dispersion of less than a certain spread threshold (@self.PREFILTERING_INTERVAL_SPREAD_THRESHOLD_DEGREES) are marked as fixations. - Then, a temporal window (@self.SLIDING_WINDOW_WIDTH_MICROSEC ms) is shifted across the remaining data and a non-fixation onset (offset) is marked every time speed rises above (fell below) threshold (@self.SPEED_THRESHOLD_DEGREES_PER_SEC). - There are two ways for speed calculation: spread and speed. -'speed': speed from start point to end point is larger than threshold. -'spread': maximum moving speed of either x or y is larger than threshold. Data with speed below threshold are labeled as 'FIX'. - Finally, non-fixation episodes longer than @self.MINIMAL_SP_DURATION_MICROSEC are kept as 'UNKNOWN', the shorter ones are labeled as 'NOISE' (these are fairly dynamic episodes that however should not be SP). :param gaze_points: arff object with saccades detected (and intersaccadic intervals labelled) :param inplace: whether to replace the data inside @gaze_points or create a new structure :return: arff object with data labeled as 'FIX' and 'NOISE'. Some 'UNKNOWN' labels are kept for the next stage. """ if not inplace: gaze_points = copy.deepcopy(gaze_points) # add a global index column (to keep track of where we are even if working within an intersaccadic interval) gaze_points = ArffHelper.add_column(gaze_points, name='global_index', dtype='INTEGER', default_value=-1) gaze_points['data']['global_index'] = np.arange( gaze_points['data'].shape[0]) # I. First step of fixation removal: rough prefiltering # # Convert constants to pixels per second ppd = util.calculate_ppd(gaze_points) speed_thd = ppd * self.SPEED_THRESHOLD_DEGREES_PER_SEC prefiltering_spread_thd = ppd * self.PREFILTERING_INTERVAL_SPREAD_THRESHOLD_DEGREES # record intersaccadic interval indices of those intervals that are not labelled as FIX by the prefiltering unknown_interval_index = [] unknown_interval_masks = [] for i in xrange( max(gaze_points['data']['INTERSACC_INTERVAL_INDEX']) + 1): mask = gaze_points['data']['INTERSACC_INTERVAL_INDEX'] == i intersacc_interval = gaze_points['data'][mask] if len(intersacc_interval) == 0: continue dispersion = [ max(intersacc_interval['x']) - min(intersacc_interval['x']), max(intersacc_interval['y']) - min(intersacc_interval['y']) ] if any(thd >= prefiltering_spread_thd for thd in dispersion): unknown_interval_index.append(i) # keep unknown unknown_interval_masks.append( mask.copy()) # cache the indexing else: gaze_points['data']['EYE_MOVEMENT_TYPE'][mask] = 'FIX' # II. Second step of fixation removal: finer prefiltering # for i, interval_mask in zip(unknown_interval_index, unknown_interval_masks): # We record the borders of the non-FIX episodes to validate their duration. If the non-FIX episode is very # short, we mark it as NOISE (not enough duration for a candidate for smooth pursuit) onset_timestamp = None onset_index = None intersacc_interval = gaze_points['data'][interval_mask] intersacc_interval = util.get_xy_moving_average( intersacc_interval, self.NORMALIZATION_SLIDING_WINDOW_SIZE_SAMPLES, inplace=False) # for intervals shorter than @self.INTERSACCADIC_INTERVAL_DURATION_THRESHOLD_MICROSEC: # cannot do further filtering. The label remains 'UNKNOWN' if intersacc_interval['time'][-1] - intersacc_interval['time'][0] < \ self.INTERSACCADIC_INTERVAL_DURATION_THRESHOLD_MICROSEC: continue # for intervals that longer than self.SLIDING_WINDOW_WIDTH_MICROSEC: do further pre-filtering. # Label data as 'FIX' or 'NOISE', or keep 'UNKNOWN' else: # window is shifted by 1 sample every time for index, item in enumerate(intersacc_interval): x_start = item['x'] y_start = item['y'] shift_window_interval = intersacc_interval[ (intersacc_interval['time'] >= item['time']) * (intersacc_interval['time'] <= item['time'] + self.SLIDING_WINDOW_WIDTH_MICROSEC)] # if distance between current data and the end of interval is shorter than # self.SLIDING_WINDOW_WIDTH_MICROSEC (i.e. if the end of the window matches the end of the # intersaccadic interval), we keep the previous label if it was FIX, otherwise keep UNKNOWN if shift_window_interval['time'][-1] == intersacc_interval[ 'time'][-1]: if intersacc_interval['EYE_MOVEMENT_TYPE'][index - 1] == 'FIX': gaze_points['data']['EYE_MOVEMENT_TYPE'][( gaze_points['data']['time'] == item['time'] )] = 'FIX' # we do not keep track of the non-fixation interval anymore since it will be all fixation # until the end of the intersaccadic interval onset_timestamp = None onset_index = None else: # new non-fixation interval is starting onset_timestamp = item['time'] onset_index = item['global_index'] # if distance between current data and the end of interval is larger than window size, continue # with the process else: # get window duration in seconds period = (shift_window_interval['time'][-1] - shift_window_interval['time'][0]) * 1e-6 # is the fixation criterion satisfied? fixation_flag = True if self.SLIDING_WINDOW_CRITERION == 'speed': # if the current speed is larger than speed threshold -- # mark as onset(UNKNOWN, NOISE). else -- mark as offset(FIX) x_end = shift_window_interval['x'][-1] y_end = shift_window_interval['y'][-1] if math.sqrt( (x_start - x_end)**2 + (y_start - y_end)**2) >= speed_thd * period: # will not be a fixation fixation_flag = False else: # spread # if either x_max - x_min or y_max - y_min is larger than speed threshold * time -- # mark as onset. else -- mark as offset x_max = max(shift_window_interval['x']) x_min = min(shift_window_interval['x']) y_max = max(shift_window_interval['y']) y_min = min(shift_window_interval['y']) if max(x_max - x_min, y_max - y_min) >= speed_thd * period: # will not be a fixation fixation_flag = False if fixation_flag: gaze_points['data']['EYE_MOVEMENT_TYPE'][ item['global_index']] = 'FIX' # either a fixation start or the whole interval end if fixation_flag or index == len( intersacc_interval) - 1: # if we had a non-fixation interval going on before, check it's duration if onset_index is not None: # onset episode larger than 50ms: UNKNOWN. else: NOISE if item['time'] - onset_timestamp < self.MINIMAL_SP_DURATION_MICROSEC: offset_timestamp = item['time'] - 1 offset_index = item['global_index'] - 1 # if this is not the beginning of fixation, # the last item also should be labelled as NOISE if not fixation_flag: offset_timestamp += 1 offset_index += 1 gaze_points['data'][onset_index:( offset_index + 1)]['EYE_MOVEMENT_TYPE'] = 'NOISE' # episode is finished onset_timestamp = None onset_index = None else: # if new non-fixation interval started if onset_timestamp is None: onset_timestamp = item['time'] onset_index = item['global_index'] # otherwise it just continues, don't have to do anything # can now remove the global_index column gaze_points = ArffHelper.remove_column(gaze_points, 'global_index') return gaze_points