def test_add_column(self): name = 'EM_type' dtype = ['UNKNOWN', 'FIX', 'SACCADE', 'SP', 'NOISE'] a = ArffHelper.load(open('test_data/arff_data_example.arff')) b = ArffHelper.add_column(a, name, dtype, 'UNKNOWN') a['attributes'].append((name, dtype)) self.assertEqual(a['attributes'], b['attributes'])
def _aggregate_data(self, gaze_points_list): """ Aggregate data from @DATA of all arff objects in the input list into a new data set in form of a numpy array. :param gaze_points_list: gaze data to be clustered in form of list of arff objects. :return: data set to be clustered in form of a 6-column numpy array, i.e. ['time','x','y','observer_id','CLUSTER_ID','visited_flag'], ordered by 'time' column value. """ data_set = [] for i in range(len(gaze_points_list)): gaze_points_data = gaze_points_list[i]['data'][( gaze_points_list[i]['data']['EYE_MOVEMENT_TYPE'] == 'UNKNOWN' )][['time', 'x', 'y', 'global_index']] gaze_points_data = ArffHelper.add_column_to_array( gaze_points_data, 'observer_id', 'NUMERIC', gaze_points_list[i]['metadata']['observer_id']) gaze_points_data = ArffHelper.add_column_to_array( gaze_points_data, 'CLUSTER_ID', 'NUMERIC', -1) gaze_points_data = ArffHelper.add_column_to_array( gaze_points_data, 'visited_flag', 'NUMERIC', 0) if len(gaze_points_data) > 0: data_set.append(gaze_points_data) data_set = np.concatenate(data_set) data_set = np.sort(data_set, order='time') return data_set
def wrapper(*args, **kwargs): output_arff_fname = inspect.getcallargs(func, *args, **kwargs).get('output_arff_fname', None) arff_obj = func(*args, **kwargs) if output_arff_fname is not None: with open(output_arff_fname, 'w') as arff_out: ArffHelper.dump(arff_obj, arff_out) return arff_obj
def add_eye_movement_attribute(arff_object): """ Add the EYE_MOVEMENT_TYPE attribute to the @arff_object. If already present, do nothing. :param arff_object: arff object :return: arff object with added column for eye movement type """ from recording_processor import EM_TYPE_ATTRIBUTE_NAME, EM_TYPE_ARFF_DATA_TYPE, EM_TYPE_DEFAULT_VALUE if 'EYE_MOVEMENT_TYPE' not in arff_object['data'].dtype.names: ArffHelper.add_column(arff_object, EM_TYPE_ATTRIBUTE_NAME, EM_TYPE_ARFF_DATA_TYPE, EM_TYPE_DEFAULT_VALUE) return arff_object
def cluster(self, gaze_points_list, inplace=False): """ Find clusters of input gaze data and label clustered points as smooth pursuit. Labels (sets the 'EYE_MOVEMENT_TYPE' field) the clusters of data points as 'SP', other samples as 'NOISE_CLUSTER'. New column 'CLUSTER_ID' is added into the @DATA section of each arff object in @gaze_points_list, indicating cluster group ID. :param gaze_points_list: a list of arff objects (dictionary with fields such as 'data' and 'metadata') :param inplace: whether to modify the original input gaze data with gaze data after clustering or use a copy :return: gaze data after clustering in the same form as the input data. """ if not inplace: gaze_points_list = copy.deepcopy(gaze_points_list) # add global indexing to be able to reference the particular sample even after clustering all in one structure for ind in xrange(len(gaze_points_list)): ArffHelper.add_column(gaze_points_list[ind], name='global_index', dtype='INTEGER', default_value=-1) gaze_points_list[ind]['data']['global_index'] = np.arange(gaze_points_list[ind]['data'].shape[0]) self._setup_internal_parameters(gaze_points_list) self._data_set = self._aggregate_data(gaze_points_list) # has to be a copy, so that is is placed continuously in memory self._timestamps = self._data_set['time'].copy() current_cluster_id = 0 for i in xrange(len(self._data_set)): if self._data_set[i]['visited_flag'] == 1: continue else: self._data_set[i]['visited_flag'] = 1 neighbourhood = self._get_neighbourhood(i) if self._validate_neighbourhood(neighbourhood): # if not: mark current point as NOISE self._expand_cluster(i, neighbourhood, current_cluster_id) current_cluster_id += 1 # create a new column in gaze_points_list for CLUSTER_ID for i in xrange(len(gaze_points_list)): ArffHelper.add_column(gaze_points_list[i], 'CLUSTER_ID', 'NUMERIC', -1) # label data in gaze_points_list as SP according to CLUSTER_ID for i in xrange(len(self._data_set)): observer_id = int(self._data_set[i]['observer_id']) global_index = self._data_set[i]['global_index'] if self._data_set[i]['CLUSTER_ID'] != -1: gaze_points_list[observer_id]['data']['EYE_MOVEMENT_TYPE'][global_index] = 'SP' gaze_points_list[observer_id]['data']['CLUSTER_ID'][global_index] = self._data_set[i]['CLUSTER_ID'] else: gaze_points_list[observer_id]['data']['EYE_MOVEMENT_TYPE'][global_index] = 'NOISE_CLUSTER' # can now remove the global_index column for ind in xrange(len(gaze_points_list)): ArffHelper.remove_column(gaze_points_list[ind], name='global_index') return gaze_points_list
def test_dump(self): a = ArffHelper.load(open('test_data/arff_data_example.arff')) # close the returned file handle after dump has been completed ArffHelper.dump(a, open('test_data/test_dump.arff', 'w')).close() b = arff.load(open('test_data/test_dump.arff')) c = arff.load(open('test_data/arff_data_example.arff')) del (a['metadata']) del (a['description']) del (b['description']) del (c['description']) np.testing.assert_almost_equal(a['data'].tolist(), b['data'], 2) np.testing.assert_almost_equal(a['data'].tolist(), c['data'], 2) del (a['data']) del (b['data']) del (c['data']) self.assertEqual(a, b) self.assertEqual(a, c) os.remove('test_data/test_dump.arff')
def test_load(self): a = ArffHelper.load(open('test_data/arff_data_example.arff')) self.assertEqual(a['metadata']['distance_mm'], 450.0) del (a['metadata']) b = arff.load(open('test_data/arff_data_example.arff')) del (a['description']) del (b['description']) np.testing.assert_almost_equal(a['data'].tolist(), b['data'], 2) # tested data equality, can now delete it del (a['data']) del (b['data']) self.assertEqual(a, b)
def load_ARFF_as_arff_object(fname, eye_movement_type_attribute=None, eye_movement_type_mapping_dict=None): """ Load data from ARFF file format (with %@METADATA comments possible, see [1]). We expect and verify that the arff file in question has the columns 'time', 'x' and 'y' (for the timestamp, x and y position of the gaze point, respectively). [1] http://ieeexplore.ieee.org/abstract/document/7851169/ :param fname: name of the .arff file :param eye_movement_type_attribute: the attribute that should be treated as an indication of eye movement type, optional; should be either a string (name of the attribute), or True, in which case it is substituted by the 'EYE_MOVEMENT_TYPE' string :param eye_movement_type_mapping_dict: a dictionary that is used to convert values in column @eye_movement_type_attribute to values in the following set: ['UNKNOWN', 'FIX', 'SACCADE', 'SP', 'NOISE', 'BLINK', 'NOISE_CLUSTER'] (as defined by recording_processor.py) :return: an arff object """ # EM_VALUE_MAPPING_DEFAULT is the inverse of the dictionary used in evaluation.py. # It can be used (with @eye_movement_type_mapping_dict='default'), for instance, to load the files where # different eye movements are labelled by numerical values rather than by categorical (i.e. strings), due to # arff file implementation in the framework that produced the labels, or some other reason. # These values correspond to the ones used in our hand-labelling tool [1]. arff_obj = ArffHelper.load(open(fname)) # validate that we have all the essential data assert all([attr in arff_obj['data'].dtype.names for attr in ['time', 'x', 'y']]), \ 'File {} must contain at least "time", "x" and "y" columns'.format(fname) if eye_movement_type_attribute is not None: from recording_processor import EM_TYPE_ARFF_DATA_TYPE, EM_TYPE_ATTRIBUTE_NAME if eye_movement_type_attribute is True: eye_movement_type_attribute = EM_TYPE_ATTRIBUTE_NAME assert eye_movement_type_attribute in arff_obj['data'].dtype.names, \ 'Attribute {} is not present in the arff structure from file {}'.format(eye_movement_type_attribute, fname) # add the dedicated eye movement type column arff_obj = util.add_eye_movement_attribute(arff_obj) if eye_movement_type_mapping_dict is None: # Check if the column is not yet of the right format. # Only need to do this if the attribute is numerical, not categorical! if arff_obj['data'][eye_movement_type_attribute].dtype.type is not np.string_: correct_flag = all([item in EM_TYPE_ARFF_DATA_TYPE for item in arff_obj['data'][eye_movement_type_attribute]]) else: # nothing to do here, already a categorical attribute correct_flag = True if correct_flag: # already the perfect values in the respective column, just put the same values in the special column arff_obj['data']['EYE_MOVEMENT_TYPE'] = arff_obj['data'][eye_movement_type_attribute] return arff_obj else: # if None, act as 'default', if needed eye_movement_type_mapping_dict = EM_VALUE_MAPPING_DEFAULT elif eye_movement_type_mapping_dict == 'default': eye_movement_type_mapping_dict = EM_VALUE_MAPPING_DEFAULT assert isinstance(eye_movement_type_mapping_dict, dict), 'Argument @eye_movement_type_mapping_dict must be ' \ 'either a dict, or None, or a string "default"' assert all([v in EM_TYPE_ARFF_DATA_TYPE for v in eye_movement_type_mapping_dict.values()]), \ 'All the values of the provided dictionary must be one of the following: {}'.format(EM_TYPE_ARFF_DATA_TYPE) # now map using the dictionary original_values = arff_obj['data'][eye_movement_type_attribute] mapped_values = [eye_movement_type_mapping_dict[item] for item in original_values] arff_obj['data']['EYE_MOVEMENT_TYPE'] = mapped_values arff_obj['metadata']['filename'] = fname return arff_obj
def load_DSF_coord_as_arff_object(fname, output_arff_fname=None): """ Load data from the given input .coord file and return an arff object. This is a "model" function for writing new data adapters. To create a similarly-functioning method, one would need to parse the file under @fname to extract an an arff object (dictionary with special keys) ofr the following structure: arff_obj = { 'relation': 'gaze_recording', 'description': '', 'data': [], 'metadata': {}, 'attributes': [('time', 'INTEGER'), ('x', 'NUMERIC'), ('y', 'NUMERIC'), ('confidence', 'NUMERIC')]}, and fill in its fields. 'data' should first contain a numpy list of lists (the latter lists should be of the same length as 'attributes'. 'description' is just a string that gets put into the beginning of the file. 'metadata' is a dictionary, where the following keys are needed later on: - "width_px", "height_px" - pixel dimensions of the video - "width_mm", "height_mm" - physical dimensions of the video (in millimeters) - "distance_mm" - distance between the observer's eyes and the monitor (in millimeters) 'attributes' (if additional ones are required) is a list of tuples, each tuple consisting of 2 elements: - attribute name - attribute type, can be INTEGER (=int64), NUMERIC (=float32), REAL (=double), or a list of strings, which means it is a categorical attribute and only these values are accepted. After 'data' is filled with appropriate lists of values, call >> arff_obj = ArffHelper.convert_data_to_structured_array(arff_obj) to (unsurprisingly) convert the data in @arff_obj['data'] into a structured numpy array for easier data access. :param fname: name of .coord file. :param output_arff_fname: if desired, this function can also convert the input .coord file into an .arff file, that can be further used within this framework as well. :return: an arff object with keywords: "@RELATION, @DESCRIPTION, @DATA, @METADATA, @ATTRIBUTES". """ load_DSF_coord_as_arff_object.COMMENT_PREFIX = '#' # the 'gaze ... ...' line has this many "fields" (defines the video resolution) load_DSF_coord_as_arff_object.GAZE_FORMAT_FIELD_COUNT = 3 # Samples are in lines that look like <timestamp> <x> <y> <confidence>. # In case of binocular tracking, these are the mean coordinates of the two eyes anyway. load_DSF_coord_as_arff_object.GAZE_SAMPLE_FIELDS = 4 if not os.path.isfile(fname): raise ValueError("No such .coord file named '{}' or incorrect input format of file name".format(fname)) arff_obj = { 'relation': 'gaze_recording', 'description': [], 'data': [], 'metadata': OrderedDict(), 'attributes': [('time', 'INTEGER'), ('x', 'NUMERIC'), ('y', 'NUMERIC'), ('confidence', 'NUMERIC')]} description = [] for line in open(fname): line = line.rstrip('\n ') if line.startswith(load_DSF_coord_as_arff_object.COMMENT_PREFIX): description.append(line[len(load_DSF_coord_as_arff_object.COMMENT_PREFIX):]) continue try: ll = line.split() # cut out the first needed values (t, x, y, confidence), even if binocular tracking .coord file ll = map(float, ll)[:load_DSF_coord_as_arff_object.GAZE_SAMPLE_FIELDS] arff_obj['data'].append(ll) except ValueError: if line.startswith('gaze'): words = line.split() # This line should the following format: # gaze <video width in pixels> <video height in px> if len(words) == load_DSF_coord_as_arff_object.GAZE_FORMAT_FIELD_COUNT: arff_obj['metadata']['width_px'] = float(words[1]) arff_obj['metadata']['height_px'] = float(words[2]) else: raise ValueError("Incorrect gaze data format in file {}. " "Correct format should be 'gaze <width_in_pixels> <height_in_pixels>'". format(fname)) elif line.startswith('geometry'): words = line.split() # This line should the following format: # geometry <property_name_1> <value in meters> <property_name_2> <value in meters> ... # So we deem every second field as property name or value in meters, respectively. # We convert the values to mm. for i in xrange(1, len(words), 2): key_mm = '{}_mm'.format(words[i]) value_mm = float(words[i + 1]) * 1e3 arff_obj['metadata'][key_mm] = value_mm continue arff_obj['metadata']['filename'] = fname arff_obj['description'] = '\n'.join(description) arff_obj = ArffHelper.convert_data_to_structured_array(arff_obj) return arff_obj
def run_detection(params): """ Run the entire detection pipeline with given parameters. :param params: A two-level dictionary (just like create_parameters_from_args() would return). The only required parameter is @params['GeneralArguments']['input_folder'], which should point to a folder with raw gaze data. The data is assumed to be stored in the following way: (1) for each movie (clip) there should be a separate subdirectory in the input_folder (2) inside these subdirectories all the files with the extension of @params['GeneralArguments']['gaze_extension'] (.coord by default) represent a recording for one observer each. If your data does not get loaded, maybe the appropriate data loader does not get called. You can fix this (provided that the suitable data loader exists in data_loaders.py) by setting @params['GeneralArguments']['input_data_type'] to the correct value (for correspondence see the keys of RecordingProcessor._format_loaders). To summarize, a minimalistic input to run detection with default parameters on your dataset (let's assume you have converted the data to .arff format) would be: run_detection({'GeneralArguments': {'input_folder': 'PATH/TO/YOUR/DATA/FOLDER', 'gaze_extension': '.arff'}}) :return: path to results folder """ # make a defaultdict out of @parameters so that we could always access its first-level keys params_default_first_level = defaultdict(dict) params_default_first_level.update(params) params = params_default_first_level verbose = params['GeneralArguments'].get('verbose', False) out_folder = params['GeneralArguments'].get('output_folder') if out_folder is None: out_folder = tempfile.mkdtemp(prefix='sp_tool_') warnings.warn('No output folder provided, using {}'.format(out_folder)) if verbose: print >> sys.stderr, 'Outputs will be written to folder', out_folder saccade_detector = SaccadeDetector(**params['SaccadeDetector']) blink_detector = BlinkDetector(**params['BlinkDetector']) fixation_detector = FixationDetector(**params['FixationDetector']) recording_processor = RecordingProcessor(saccade_detector=saccade_detector, blink_detector=blink_detector, fixation_detector=fixation_detector) sp_detector = SmoothPursuitDetector(**params['SmoothPursuitDetector']) # The next lines deal with identifying the names of the video clips used for the eye tracking experiment. # Can be initialized in various ways, here we just get all video paths be regex and cut off everything that # is not needed. # # in_folder = params['GeneralArguments'].get('input_folder') if not in_folder: raise ValueError('\'input_folder\' is a required parameter of the \'GeneralArguments\' group in @params!') folder_names = sorted(glob.glob('{}/*/'.format(in_folder))) # getting all the folders of the input folder # extract names from path if not folder_names and verbose: print >> sys.stderr, 'No subfolders found under "{}"'.format(in_folder) folder_names = [os.path.splitext(os.path.basename(folder.rstrip('/')))[0] for folder in folder_names] movies = params['GeneralArguments'].get('movies') if movies: # not empty, restrict to these folders only movies = set(movies) folder_names = [fn for fn in folder_names if fn in movies] if verbose: print >> sys.stderr, 'Working with movies:', folder_names # data files extension gaze_pattern = params['GeneralArguments'].get('gaze_file_pattern', '*.coord') if '*' not in gaze_pattern: gaze_pattern = '*' + gaze_pattern for movie in folder_names: full_out_folder = '{}/{}/'.format(out_folder, movie) if not os.path.exists(full_out_folder): os.makedirs(full_out_folder) if verbose: print >> sys.stderr, 'Started processing for {},'.format(movie), 'results will appear in', full_out_folder # The next lines load the data files of the recording with one particular movie. # To do this, here we provide a regex that includes all the .{extension} files in the respective folder. # # gaze_data_files = sorted(glob.glob('{}/{}/{}'.format(in_folder, movie, gaze_pattern))) if len(gaze_data_files) == 0: print >> sys.stderr, 'Found 0 files with this pattern: "{}". Omitting this directory.'.format( '{}/{}/{}'.format(in_folder, movie, gaze_pattern) ) continue try: # The next line loads the data, labels saccades, blinks and fixations. gaze_points_list = recording_processor.load_multiple_recordings( gaze_data_files, verbose=verbose, data_format=params['GeneralArguments'].get('input_data_type')) # This will label the smooth pursuits if verbose: print >> sys.stderr, 'Saccades/blinks/fixations are detected, starting SP detection.' classified_gaze_points = sp_detector.detect(gaze_points_list) # Now just dump the resulting structure into .arff files in the respective subdirectory of the @out_folder for file_name, arff_data in zip(gaze_data_files, classified_gaze_points): output_file_name = os.path.splitext(os.path.basename(file_name))[0] ArffHelper.dump(arff_data, open( '{}/{}.arff'.format(full_out_folder, output_file_name), 'w')).close() except Exception as e: print >> sys.stderr, 'Had to skip {} due to an error "{}"'.format(movie, e.message) return out_folder
def detect(self, gaze_points, inplace=False): """ This method labels saccades (also noise) in the provided gaze_points, which should be an arff object :param gaze_points: gaze recording data, an arff object (i.e. a dictionary with 'data', 'metadata' and etc. keys) :param inplace: whether to replace the data inside @gaze_points or create a new structure :return: gaze points with added labels SACCADE, NOISE """ if not inplace: gaze_points = copy.deepcopy(gaze_points) # also keep track of saccadic and intersaccadic intervals detected_saccades_count = 0 if 'SACC_INTERVAL_INDEX' not in gaze_points['data'].dtype.names: ArffHelper.add_column(gaze_points, 'SACC_INTERVAL_INDEX', 'INTEGER', -1) # a virtual saccade that finished before the recording for uniform processing last_saccade_end = -1 intersaccadic_intervals_count = 0 if 'INTERSACC_INTERVAL_INDEX' not in gaze_points['data'].dtype.names: ArffHelper.add_column(gaze_points, 'INTERSACC_INTERVAL_INDEX', 'INTEGER', -1) # verify that the timestamps are sorted! times = gaze_points['data']['time'] assert all(times[i] <= times[i + 1] for i in xrange(len(times) - 1)), \ 'Timestamps are not sorted in {}'.format(gaze_points['metadata']['filename']) # -1 so that the exact value ends up on the right of the searched timestamp searchable_timestamps = times - self.VELOCITY_INTEGRAL_INTERVAL_MICROSEC - 1 # find the indices of the first prev_indices = np.searchsorted(times, searchable_timestamps, side='right') cur_indices = np.arange(len(prev_indices)) # if the index after search points towards this very data point, take the previous one prev_indices[prev_indices == cur_indices] -= 1 # except for the very first sample prev_indices[0] = 0 # computing velocities x_shifts = gaze_points['data']['x'][cur_indices] - gaze_points['data'][ 'x'][prev_indices] y_shifts = gaze_points['data']['y'][cur_indices] - gaze_points['data'][ 'y'][prev_indices] shifts = np.linalg.norm(np.vstack([x_shifts, y_shifts]), axis=0) time_shifts = gaze_points['data']['time'][cur_indices] - gaze_points[ 'data']['time'][prev_indices] # keep it above 0, the shifts are 0 there anyway time_shifts[time_shifts == 0] += 1 velocities = shifts / time_shifts # pixels per microsecond ppd = util.calculate_ppd(gaze_points) velocities /= ppd # degree per microsecond velocities *= 1e6 # degree per second # How many samples back is it reasonable to go? time_step = np.diff(times).mean() # a big margin of error, 10 times as many samples as would normally need extra_samples_count = int( np.round((self.MAX_DURATION_MICROSEC * 10) / time_step)) # Glitch detection: glitches are defined by one of several features. # # (1) Coordinates far outside the calibrated region (what constitutes far is defined # by the tolerance parameter) are assumed to be erroneous. is_glitch = np.zeros(gaze_points['data'].shape[0], dtype=np.bool) is_glitch[gaze_points['data']['x'] < -gaze_points['metadata']['width_px'] * self.TOLERANCE] = True is_glitch[ gaze_points['data']['y'] < -gaze_points['metadata']['height_px'] * self.TOLERANCE] = True is_glitch[ gaze_points['data']['x'] > gaze_points['metadata']['width_px'] * (1 + self.TOLERANCE)] = True is_glitch[ gaze_points['data']['y'] > gaze_points['metadata']['height_px'] * (1 + self.TOLERANCE)] = True # (2) If the @gaze_points supports the estimate of a confidence # measure for samples, a confidence lower than 0.1 also indicates # a glitch here. if 'confidence' in gaze_points['data'].dtype.names: is_glitch[gaze_points['data']['confidence'] < 0.1] = True # (3) Finally, velocities that exceed \a maxSpeed (default currently # set to ~1000 degrees/s) are regarded as glitches as well and labelled as noise is_glitch[velocities > self.MAX_SPEED_DEGREE_PER_SEC] = True gaze_points['data']['EYE_MOVEMENT_TYPE'][ velocities > self.MAX_SPEED_DEGREE_PER_SEC] = 'NOISE' # Remember first sample after glitch: # to prevent saccade detection at the first non-glitch sample # that follows, saccade detection is inhibited for that first sample. post_glitch = np.diff(is_glitch.astype(int)) == -1 post_glitch = np.hstack(([False], post_glitch)) # Remember last sample before glitch: # since we normally would suspend the other criteria (incl. speed) if we are inside glitch, we try to avoid # border effects in both next-after and last-before glitch samples pre_glitch = np.diff(is_glitch.astype(int)) == 1 pre_glitch = np.hstack((pre_glitch, [False])) all_glitch = is_glitch + post_glitch + pre_glitch # we will assign glitch samples' labels to NOISE after the saccades have been detected # recompute speeds for post-glitch samples pre_glitch_indices = np.nonzero(pre_glitch)[0] for i in np.nonzero(post_glitch)[0]: # find the corresponding start of the glitch corresponding_pre_glitch = np.searchsorted(pre_glitch_indices, i) - 1 if corresponding_pre_glitch < 0: # no correspondence found, it's the glitch from the beginning of recording ==> set velocity to 0 velocities[i] = 0 else: # found a completed glitch velocities[i] = np.linalg.norm([ gaze_points['data']['x'][i] - gaze_points['data']['x'][corresponding_pre_glitch], gaze_points['data']['y'][i] - gaze_points['data']['y'][corresponding_pre_glitch] ]) / (times[i] - times[corresponding_pre_glitch] ) # pixels per microsecond velocities[i] /= ppd # degrees per microsecond velocities[i] *= 1e6 # degrees per second # Looking for saccade seed points # saccade seed point should # (1) exceed the fast threshold # (2) be biologically plausible # (3) not be inside a glitch saccade_seeds = (velocities > self.THRESHOLD_ONSET_FAST_DEGREE_PER_SEC) * \ (velocities < self.MAX_SPEED_DEGREE_PER_SEC) * \ (1 - all_glitch) saccade_seed_indices = np.nonzero(saccade_seeds)[0] for potential_seed_index in saccade_seed_indices: if gaze_points['data']['EYE_MOVEMENT_TYPE'][ potential_seed_index] != 'UNKNOWN': # already labelled this before, ex. as a saccade that started from another seed point continue if self.verbose == 2: print >> sys.stderr, 'potential seed index', potential_seed_index # Looking for onset: # (1) should be above slow threshold speed # (2) should not be a glitch # (3) does not yet have a label onset_candidates_check = (velocities[max(0, potential_seed_index - extra_samples_count):potential_seed_index] >= self.THRESHOLD_ONSET_SLOW_DEGREE_PER_SEC) * \ (1 - is_glitch[max(0, potential_seed_index - extra_samples_count):potential_seed_index]) * \ (gaze_points['data']['EYE_MOVEMENT_TYPE'][ max(0, potential_seed_index - extra_samples_count):potential_seed_index ] == 'UNKNOWN') # find the last zero (the next sample after it is the beginning of the last uninterrupted 1-sequence, # i.e. the saccade onset try: last_zero_index = np.nonzero(1 - onset_candidates_check)[0][-1] except IndexError: # not found continue saccade_onset_index = last_zero_index + 1 + max( 0, potential_seed_index - extra_samples_count) # shift accordingly # also this should not be the glitch or post/pre-glitch sample while all_glitch[saccade_onset_index]: saccade_onset_index += 1 # looking for offset # (1) should be above offset speed threshold # (2) should not exceed biologically plausible duration threshold # (3) should not yet have a label (i.e. not NOISE labelled above) offset_candidates_check = (velocities[potential_seed_index:potential_seed_index + extra_samples_count] >= self.THRESHOLD_OFFSET_DEGREE_PER_SEC) * \ (times[potential_seed_index:potential_seed_index + extra_samples_count] - times[saccade_onset_index] <= self.MAX_DURATION_MICROSEC) # we ignore the criterion around the glitch offset_candidates_check += is_glitch[ potential_seed_index:potential_seed_index + extra_samples_count] offset_candidates_check += post_glitch[ potential_seed_index:potential_seed_index + extra_samples_count] # but there should not yet be a label present, i.e. it's not the NOISE labelled above offset_candidates_check *= ( gaze_points['data']['EYE_MOVEMENT_TYPE'] [potential_seed_index:potential_seed_index + extra_samples_count] == 'UNKNOWN') # find the first zero (this is the first sample with speed below the threshold, i.e. the saccade offset try: saccade_offset_index = np.nonzero( 1 - offset_candidates_check)[0][0] except IndexError: # no offset found continue # the index was starting at potential_seed_index saccade_offset_index += potential_seed_index # if we are finished inside the glitch, we have reached a biological limit of some sorts ==> discard if is_glitch[saccade_offset_index]: continue if self.verbose == 2: print >> sys.stderr, 'Found onset/offset indices', saccade_onset_index, saccade_offset_index # now validate the saccade parameters # (1) it spans at least the minimal necessary interval saccade_time = times[saccade_offset_index] - times[ saccade_onset_index] if saccade_time < self.MIN_DURATION_MICROSEC: # If the resulting saccade is shorter than # a minDuration, we assume that we have only encountered # some noise impulse and discard this saccade. gaze_points['data']['EYE_MOVEMENT_TYPE'][ saccade_onset_index:saccade_offset_index + 1] = 'NOISE' if self.verbose == 2: print >> sys.stderr, 'Discarding due to low duration: needed {}, had {}'.\ format(self.MIN_DURATION_MICROSEC, saccade_time) continue # (2) mean velocity is not below the slow onset threshold saccade_displacement = np.linalg.norm([ gaze_points['data']['x'][saccade_offset_index] - gaze_points['data']['x'][saccade_onset_index], gaze_points['data']['y'][saccade_offset_index] - gaze_points['data']['y'][saccade_onset_index], ]) mean_speed = saccade_displacement / saccade_time # pixels per microsecond mean_speed /= ppd # degrees per microsecond mean_speed *= 1e6 # degrees per second if mean_speed < self.THRESHOLD_ONSET_SLOW_DEGREE_PER_SEC: # Saccades where the average velocity drops below the offset threshold # are also discarded (those are often due to some high-velocity samples # going in one direction, then jumping back - which is unbiological). if self.verbose == 2: print >> sys.stderr, 'Discarding due to low average speed: needed {}, had {}'.format( self.THRESHOLD_ONSET_SLOW_DEGREE_PER_SEC, mean_speed) continue # If all is okay, we detected a whole saccade gaze_points['data']['EYE_MOVEMENT_TYPE'][ saccade_onset_index:saccade_offset_index + 1] = 'SACCADE' # write the saccade index into the appropriate field and update the global count gaze_points['data']['SACC_INTERVAL_INDEX'][saccade_onset_index:saccade_offset_index + 1] = \ detected_saccades_count detected_saccades_count += 1 # from the end of last saccade till the beginning of this one, put appropriate intersaccadic interval index # also update the global count of intersaccadic intervals gaze_points['data']['INTERSACC_INTERVAL_INDEX'][last_saccade_end + 1:saccade_onset_index] = \ intersaccadic_intervals_count intersaccadic_intervals_count += 1 last_saccade_end = saccade_offset_index if self.verbose: print >> sys.stderr, '{0} {1:0.1f} {2:0.1f} {3} {4:0.1f} {5:0.1f}'.format( gaze_points['data'][saccade_onset_index]['time'], gaze_points['data'][saccade_onset_index]['x'], gaze_points['data'][saccade_onset_index]['y'], gaze_points['data'][saccade_offset_index]['time'], gaze_points['data'][saccade_offset_index]['x'], gaze_points['data'][saccade_offset_index]['y'], ) # final intersaccadic interval, if there is one gaze_points['data']['INTERSACC_INTERVAL_INDEX'][last_saccade_end + 1:] = \ intersaccadic_intervals_count intersaccadic_intervals_count += 1 # Override erroneous samples' labels gaze_points['data']['EYE_MOVEMENT_TYPE'][is_glitch] = 'NOISE' return gaze_points
def detect(self, gaze_points, inplace=False): """ Identify and label fixation intervals as 'FIX' and some others as 'NOISE'. Fixation identification includes the following steps: - First, all inter-saccadic intervals with a dispersion of less than a certain spread threshold (@self.PREFILTERING_INTERVAL_SPREAD_THRESHOLD_DEGREES) are marked as fixations. - Then, a temporal window (@self.SLIDING_WINDOW_WIDTH_MICROSEC ms) is shifted across the remaining data and a non-fixation onset (offset) is marked every time speed rises above (fell below) threshold (@self.SPEED_THRESHOLD_DEGREES_PER_SEC). - There are two ways for speed calculation: spread and speed. -'speed': speed from start point to end point is larger than threshold. -'spread': maximum moving speed of either x or y is larger than threshold. Data with speed below threshold are labeled as 'FIX'. - Finally, non-fixation episodes longer than @self.MINIMAL_SP_DURATION_MICROSEC are kept as 'UNKNOWN', the shorter ones are labeled as 'NOISE' (these are fairly dynamic episodes that however should not be SP). :param gaze_points: arff object with saccades detected (and intersaccadic intervals labelled) :param inplace: whether to replace the data inside @gaze_points or create a new structure :return: arff object with data labeled as 'FIX' and 'NOISE'. Some 'UNKNOWN' labels are kept for the next stage. """ if not inplace: gaze_points = copy.deepcopy(gaze_points) # add a global index column (to keep track of where we are even if working within an intersaccadic interval) gaze_points = ArffHelper.add_column(gaze_points, name='global_index', dtype='INTEGER', default_value=-1) gaze_points['data']['global_index'] = np.arange( gaze_points['data'].shape[0]) # I. First step of fixation removal: rough prefiltering # # Convert constants to pixels per second ppd = util.calculate_ppd(gaze_points) speed_thd = ppd * self.SPEED_THRESHOLD_DEGREES_PER_SEC prefiltering_spread_thd = ppd * self.PREFILTERING_INTERVAL_SPREAD_THRESHOLD_DEGREES # record intersaccadic interval indices of those intervals that are not labelled as FIX by the prefiltering unknown_interval_index = [] unknown_interval_masks = [] for i in xrange( max(gaze_points['data']['INTERSACC_INTERVAL_INDEX']) + 1): mask = gaze_points['data']['INTERSACC_INTERVAL_INDEX'] == i intersacc_interval = gaze_points['data'][mask] if len(intersacc_interval) == 0: continue dispersion = [ max(intersacc_interval['x']) - min(intersacc_interval['x']), max(intersacc_interval['y']) - min(intersacc_interval['y']) ] if any(thd >= prefiltering_spread_thd for thd in dispersion): unknown_interval_index.append(i) # keep unknown unknown_interval_masks.append( mask.copy()) # cache the indexing else: gaze_points['data']['EYE_MOVEMENT_TYPE'][mask] = 'FIX' # II. Second step of fixation removal: finer prefiltering # for i, interval_mask in zip(unknown_interval_index, unknown_interval_masks): # We record the borders of the non-FIX episodes to validate their duration. If the non-FIX episode is very # short, we mark it as NOISE (not enough duration for a candidate for smooth pursuit) onset_timestamp = None onset_index = None intersacc_interval = gaze_points['data'][interval_mask] intersacc_interval = util.get_xy_moving_average( intersacc_interval, self.NORMALIZATION_SLIDING_WINDOW_SIZE_SAMPLES, inplace=False) # for intervals shorter than @self.INTERSACCADIC_INTERVAL_DURATION_THRESHOLD_MICROSEC: # cannot do further filtering. The label remains 'UNKNOWN' if intersacc_interval['time'][-1] - intersacc_interval['time'][0] < \ self.INTERSACCADIC_INTERVAL_DURATION_THRESHOLD_MICROSEC: continue # for intervals that longer than self.SLIDING_WINDOW_WIDTH_MICROSEC: do further pre-filtering. # Label data as 'FIX' or 'NOISE', or keep 'UNKNOWN' else: # window is shifted by 1 sample every time for index, item in enumerate(intersacc_interval): x_start = item['x'] y_start = item['y'] shift_window_interval = intersacc_interval[ (intersacc_interval['time'] >= item['time']) * (intersacc_interval['time'] <= item['time'] + self.SLIDING_WINDOW_WIDTH_MICROSEC)] # if distance between current data and the end of interval is shorter than # self.SLIDING_WINDOW_WIDTH_MICROSEC (i.e. if the end of the window matches the end of the # intersaccadic interval), we keep the previous label if it was FIX, otherwise keep UNKNOWN if shift_window_interval['time'][-1] == intersacc_interval[ 'time'][-1]: if intersacc_interval['EYE_MOVEMENT_TYPE'][index - 1] == 'FIX': gaze_points['data']['EYE_MOVEMENT_TYPE'][( gaze_points['data']['time'] == item['time'] )] = 'FIX' # we do not keep track of the non-fixation interval anymore since it will be all fixation # until the end of the intersaccadic interval onset_timestamp = None onset_index = None else: # new non-fixation interval is starting onset_timestamp = item['time'] onset_index = item['global_index'] # if distance between current data and the end of interval is larger than window size, continue # with the process else: # get window duration in seconds period = (shift_window_interval['time'][-1] - shift_window_interval['time'][0]) * 1e-6 # is the fixation criterion satisfied? fixation_flag = True if self.SLIDING_WINDOW_CRITERION == 'speed': # if the current speed is larger than speed threshold -- # mark as onset(UNKNOWN, NOISE). else -- mark as offset(FIX) x_end = shift_window_interval['x'][-1] y_end = shift_window_interval['y'][-1] if math.sqrt( (x_start - x_end)**2 + (y_start - y_end)**2) >= speed_thd * period: # will not be a fixation fixation_flag = False else: # spread # if either x_max - x_min or y_max - y_min is larger than speed threshold * time -- # mark as onset. else -- mark as offset x_max = max(shift_window_interval['x']) x_min = min(shift_window_interval['x']) y_max = max(shift_window_interval['y']) y_min = min(shift_window_interval['y']) if max(x_max - x_min, y_max - y_min) >= speed_thd * period: # will not be a fixation fixation_flag = False if fixation_flag: gaze_points['data']['EYE_MOVEMENT_TYPE'][ item['global_index']] = 'FIX' # either a fixation start or the whole interval end if fixation_flag or index == len( intersacc_interval) - 1: # if we had a non-fixation interval going on before, check it's duration if onset_index is not None: # onset episode larger than 50ms: UNKNOWN. else: NOISE if item['time'] - onset_timestamp < self.MINIMAL_SP_DURATION_MICROSEC: offset_timestamp = item['time'] - 1 offset_index = item['global_index'] - 1 # if this is not the beginning of fixation, # the last item also should be labelled as NOISE if not fixation_flag: offset_timestamp += 1 offset_index += 1 gaze_points['data'][onset_index:( offset_index + 1)]['EYE_MOVEMENT_TYPE'] = 'NOISE' # episode is finished onset_timestamp = None onset_index = None else: # if new non-fixation interval started if onset_timestamp is None: onset_timestamp = item['time'] onset_index = item['global_index'] # otherwise it just continues, don't have to do anything # can now remove the global_index column gaze_points = ArffHelper.remove_column(gaze_points, 'global_index') return gaze_points