def make_chunks(physiological_file_id, config_file, verbose): """ Call the function create_chunks_for_visualization of the Physiology class on the PhysiologicalFileID provided as argument to this function. :param physiological_file_id: PhysiologicalFileID of the file to chunk :type physiological_file_id: int :param config_file: path to the config file with database connection information :type config_file: str :param verbose : flag for more printing if set :type verbose : bool """ # database connection db = Database(config_file.mysql, verbose) db.connect() # grep config settings from the Config module data_dir = db.get_config('dataDirBasepath') # making sure that there is a final / in data_dir data_dir = data_dir if data_dir.endswith('/') else data_dir + "/" # load the Physiological object physiological = Physiological(db, verbose) # create the chunked dataset if physiological.grep_file_path_from_file_id(physiological_file_id): print('Chunking physiological file ID ' + str(physiological_file_id)) physiological.create_chunks_for_visualization(physiological_file_id, data_dir)
def create_and_insert_archive(self, files_to_archive, archive_rel_name, eeg_file_id): """ Create an archive with all electrophysiology files associated to a specific recording (including electrodes.tsv, channels.tsv etc...) :param files_to_archive: tuple with the list of files to include in the archive :type files_to_archive: tuple :param archive_rel_name: path to the archive relative to data_dir :type archive_rel_name: str :param eeg_file_id : PhysiologicalFileID :type eeg_file_id : int """ # load the Physiological object that will be used to insert the # physiological archive into the database physiological = Physiological(self.db, self.verbose) # check if archive is on the filesystem archive_full_path = self.data_dir + archive_rel_name blake2 = None if os.path.isfile(archive_full_path): blake2 = blake2b(archive_full_path.encode('utf-8')).hexdigest() # check if archive already inserted in database and matches the one # on the filesystem using blake2b hash result = physiological.grep_archive_info_from_file_id(eeg_file_id) if result: if not blake2: message = '\nERROR: no archive was found on the filesystem ' + \ 'while an entry was found in the database for ' + \ 'PhysiologicalFileID = ' + str(eeg_file_id) print(message) exit(lib.exitcode.MISSING_FILES) elif result['Blake2bHash'] != blake2: message = '\nERROR: blake2b hash of ' + archive_full_path +\ ' does not match the one stored in the database.' +\ '\nblake2b of ' + archive_full_path + ': ' + blake2 +\ '\nblake2b in the database: ' + result['blake2b_hash'] print(message) exit(lib.exitcode.CORRUPTED_FILE) else: return # create the archive file utilities.create_archive(files_to_archive, archive_rel_name, self.data_dir) # insert the archive file in physiological_archive blake2 = blake2b(archive_full_path.encode('utf-8')).hexdigest() archive_info = { 'PhysiologicalFileID': eeg_file_id, 'Blake2bHash': blake2, 'FilePath': archive_rel_name } physiological.insert_archive_file(archive_info)
def register_raw_data(self): """ Registers raw EEG data and related files into the following tables: - physiological_file - physiological_parameter_file - physiological_electrode - physiological_channel - physiological_task_event """ # insert EEG file inserted_eeg = self.fetch_and_insert_eeg_file() eeg_file_id = inserted_eeg['file_id'] eeg_file_path = inserted_eeg['eeg_path'] # insert related electrode, channel and event information electrode_file_path = self.fetch_and_insert_electrode_file(eeg_file_id) channel_file_path = self.fetch_and_insert_channel_file(eeg_file_id) event_file_path = self.fetch_and_insert_event_file(eeg_file_id) # grep the path to the fdt file is present in # physiological_parameter_file for that PhysiologicalFileID physiological = Physiological(self.db, self.verbose) results = physiological.grep_parameter_value_from_file_id( eeg_file_id, 'fdt_file') fdt_file_path = results['Value'] if results else None # archive all files in a tar ball for downloading all files at once files_to_archive = (self.data_dir + eeg_file_path, ) if electrode_file_path: files_to_archive = files_to_archive + (self.data_dir + electrode_file_path, ) if fdt_file_path: # add the fdt file path to the tuple if present files_to_archive = files_to_archive + (self.data_dir + fdt_file_path, ) if event_file_path: files_to_archive = files_to_archive + (self.data_dir + event_file_path, ) if channel_file_path: files_to_archive = files_to_archive + (self.data_dir + channel_file_path, ) archive_rel_name = os.path.splitext(eeg_file_path)[0] + ".tgz" self.create_and_insert_archive(files_to_archive, archive_rel_name, eeg_file_id) # create data chunks for React visualization in # data_dir/bids_import/bids_dataset_name_BIDSVersion_chunks directory physiological.create_chunks_for_visualization(eeg_file_id, self.data_dir)
def fetch_and_insert_event_file(self, physiological_file_id, derivatives=None): """ Gather raw channel file information to insert into physiological_task_event. Once all the information has been gathered, it will call Physiological.insert_event_file that will perform the insertion into physiological_task_event, linking it to the PhysiologicalFileID already registered. :param physiological_file_id: PhysiologicalFileID of the associated physiological file already inserted into the physiological_file table :type physiological_file_id: int :param derivatives: dictionary with derivative folder information if the event file to insert is a derivative file. Set by default to None when inserting raw file. :type derivatives: dict :return: channel file path in the /DATA_DIR/bids_import directory :rtype: str """ # load the Physiological object that will be used to insert the # physiological data into the database physiological = Physiological(self.db, self.verbose) # check if inserting derivatives to use the derivative_pattern to # grep for the eeg file derivative_pattern = None derivative_path = None if derivatives: derivative_pattern = derivatives['derivative_name'] + "/sub-" derivative_path = self.get_derivatives_path(derivatives) event_file = BidsReader.grep_file( files_list=self.events_files, match_pattern='events.tsv', derivative_pattern=derivative_pattern) if not event_file: message = "WARNING: no events file associated with " \ "physiological file ID " + str(physiological_file_id) print(message) return None else: result = physiological.grep_event_from_physiological_file_id( physiological_file_id) event_path = result[0]['FilePath'] if result else None event_data = utilities.read_tsv_file(event_file) if not result: # copy the event file to the LORIS BIDS import directory event_path = self.copy_file_to_loris_bids_dir( event_file, derivative_path) # get the blake2b hash of the task events file blake2 = blake2b(event_file.encode('utf-8')).hexdigest() # insert event data in the database physiological.insert_event_file(event_data, event_path, physiological_file_id, blake2) return event_path
def fetch_and_insert_eeg_file(self, derivatives=None): """ Gather EEG file information to insert into physiological_file and physiological_parameter_file. Once all the information has been gathered, it will call self.insert_physiological_file that will perform the insertion into physiological_file and physiological_parameter_file. :param derivatives: dictionary with derivative folder information if the EEG file to insert is a derivative file. Set by default to None when inserting raw file. :type derivatives: list :return: dictionary with registered file ID and path to its file :rtype: dict """ # load the Physiological object that will be used to insert the # physiological data into the database physiological = Physiological(self.db, self.verbose) # check if inserting derivatives to use the derivative_pattern to # grep for the eeg file derivative_pattern = None derivative_path = None files_list = self.eeg_files # by default, raw data is eeg_files if derivatives: # TODO grep the source file as well as the input file ID??? derivative_pattern = derivatives['derivative_name'] + "/sub-" files_list = self.derivative_eeg_files derivative_path = self.get_derivatives_path(derivatives) # grep the raw files from eeg_files list eeg_file = BidsReader.grep_file( files_list=files_list, match_pattern=".(set$|edf$|vhdr$|vmrk$|eeg$|bdf$)", derivative_pattern=derivative_pattern) json_file = BidsReader.grep_file(files_list=files_list, match_pattern='.json$', derivative_pattern=derivative_pattern) fdt_file = BidsReader.grep_file(files_list=files_list, match_pattern='.fdt$', derivative_pattern=derivative_pattern) # return if no eeg_file was found if not eeg_file: return None # read the json file if it exists eeg_file_data = {} if json_file: with open(json_file) as data_file: eeg_file_data = json.load(data_file) # copy the JSON file to the LORIS BIDS import directory json_path = self.copy_file_to_loris_bids_dir( json_file, derivative_path) eeg_file_data['json_file'] = json_path json_blake2 = blake2b(json_file.encode('utf-8')).hexdigest() eeg_file_data['physiological_json_file_blake2b_hash'] = json_blake2 # greps the file type from the ImagingFileTypes table file_type = physiological.determine_file_type(eeg_file) # grep the output type from the physiological_output_type table output_type = 'derivatives' if derivatives else 'raw' output_type_id = self.db.grep_id_from_lookup_table( id_field_name='PhysiologicalOutputTypeID', table_name='physiological_output_type', where_field_name='OutputTypeName', where_value=output_type, insert_if_not_found=False) # get the acquisition date of the EEG file or the age at the time of the EEG recording eeg_acq_time = None if self.scans_file: scan_info = ScansTSV(self.scans_file, eeg_file, self.verbose) eeg_acq_time = scan_info.get_acquisition_time() eeg_file_data['age_at_scan'] = scan_info.get_age_at_scan() # copy the scans.tsv file to the LORIS BIDS import directory scans_path = scan_info.copy_scans_tsv_file_to_loris_bids_dir( self.bids_sub_id, self.loris_bids_root_dir, self.data_dir) eeg_file_data['scans_tsv_file'] = scans_path scans_blake2 = blake2b(self.scans_file.encode('utf-8')).hexdigest() eeg_file_data[ 'physiological_scans_tsv_file_bake2hash'] = scans_blake2 # if file type is set and fdt file exists, append fdt path to the # eeg_file_data dictionary if file_type == 'set' and fdt_file: # copy the fdt file to the LORIS BIDS import directory fdt_path = self.copy_file_to_loris_bids_dir( fdt_file, derivative_path) eeg_file_data['fdt_file'] = fdt_path fdt_blake2 = blake2b(fdt_file.encode('utf-8')).hexdigest() eeg_file_data['physiological_fdt_file_blake2b_hash'] = fdt_blake2 # append the blake2b to the eeg_file_data dictionary blake2 = blake2b(eeg_file.encode('utf-8')).hexdigest() eeg_file_data['physiological_file_blake2b_hash'] = blake2 # check that the file using blake2b is not already inserted before # inserting it result = physiological.grep_file_id_from_hash(blake2) physio_file_id = result['PhysiologicalFileID'] if result else None eeg_path = result['FilePath'] if result else None if not physio_file_id: # grep the modality ID from physiological_modality table modality_id = self.db.grep_id_from_lookup_table( id_field_name='PhysiologicalModalityID', table_name='physiological_modality', where_field_name='PhysiologicalModality', where_value=self.bids_modality, insert_if_not_found=False) # copy the eeg_file to the LORIS BIDS import directory eeg_path = self.copy_file_to_loris_bids_dir( eeg_file, derivative_path) # insert the file along with its information into # physiological_file and physiological_parameter_file tables eeg_file_info = { 'FileType': file_type, 'FilePath': eeg_path, 'SessionID': self.session_id, 'AcquisitionTime': eeg_acq_time, 'InsertedByUser': getpass.getuser(), 'PhysiologicalOutputTypeID': output_type_id, 'PhysiologicalModalityID': modality_id } physio_file_id = physiological.insert_physiological_file( eeg_file_info, eeg_file_data) # if the EEG file was a set file, then update the filename for the .set # and .fdt files in the .set file so it can find the proper file for # visualization and analyses if file_type == 'set': set_full_path = self.data_dir + eeg_path fdt_full_path = eeg_file_data['fdt_file'] if fdt_full_path: fdt_full_path = self.data_dir + eeg_file_data['fdt_file'] utilities.update_set_file_path_info(set_full_path, fdt_full_path) return {'file_id': physio_file_id, 'eeg_path': eeg_path}