def get_annotations(self): """ Returns: -------- Annotations - in form of pandas DataFrame(s) """ if self.session is None or self.session.session_md is None: self.session = MefSession(self._path, self._password) dfs_out = {} session_md = self.session.session_md # Get session level records if 'records_info' in session_md.keys(): session_records = session_md['records_info']['records'] dfs_out.update(self._process_mef_records(session_records)) # Get channel level records for _, channel_d in session_md['time_series_channels'].items(): if 'records_info' in channel_d.keys(): ch_rec_list = channel_d['records_info']['records'] else: ch_rec_list = [] # Get segment level records for segment_d in channel_d['segments'].values(): if 'records_info' in segment_d.keys(): ch_rec_list += segment_d['records_info']['records'] dfs_out.update(self._process_mef_records(ch_rec_list)) return dfs_out
def password_check(self, password): try: if self.session is None: self.session = MefSession(self._path, password, False, check_all_passwords=False) return True except RuntimeError as e: return False
def load_metadata(self): if self.session is None or self.session.session_md is None: self.session = MefSession(self._path, self._password, check_all_passwords=False) # Get information about the recording s_md = self.session.session_md['session_specific_metadata'] ts_md = self.session.session_md['time_series_metadata']['section_2'] ts_ch = self.session.session_md['time_series_channels'] channel_list = list(ts_ch.keys()) channel_list.sort() self.recording_info = {} self.recording_info['recording_start'] = s_md['earliest_start_time'][0] self.recording_info['recording_end'] = s_md['latest_end_time'][0] self.recording_info['recording_duration'] = ( ts_md['recording_duration'][0]) self.recording_info['extension'] = '.mefd' self.recording_info['nchan'] = len(channel_list) dmap = np.zeros(len(channel_list), dtype=[('fsamp', np.float), ('nsamp', np.int32), ('ufact', np.float), ('unit', np.object), ('channels', np.object), ('discontinuities', np.ndarray), ('ch_set', np.bool), ('uutc_ss', np.int64, 2)]) for i, channel in enumerate(channel_list): channel_md = ts_ch[channel] fsamp = channel_md['section_2']['sampling_frequency'][0] nsamp = channel_md['section_2']['number_of_samples'][0] ufact = channel_md['section_2']['units_conversion_factor'][0] unit = channel_md['section_2']['units_description'][0] unit = unit.decode("utf-8") channel_spec_md = channel_md['channel_specific_metadata'] start_time = channel_spec_md['earliest_start_time'][0] end_time = channel_spec_md['latest_end_time'][0] toc = self.session.get_channel_toc(channel) disc_stops = toc[3, toc[0] == 1] disc_starts = disc_stops - toc[1, toc[0] == 1] disconts = np.c_[disc_starts, disc_stops] dmap[i] = (fsamp, nsamp, ufact, unit, channel, disconts, True, [start_time, end_time]) self.data_map.setup_data_map(dmap)
def load_data_epochs(data_path, channels, onsets, trial_epoch=(-1, 3), baseline_norm=None, baseline_epoch=(-1, -0.1)): """ Load and epoch the data into a matrix based on channels, stimulus onsets and the epoch range (relative to the onsets) Args: data_path (str): Path to the data file or folder channels (list or tuple): The channels that should read from the data, the output will be sorted according to this input argument. onsets (list or tuple): The onsets of the stimuli (e.g. trials) around which to epoch the data trial_epoch (tuple): The time-span that will be considered as the signal belonging to a single trial. Expressed as a tuple with the start- and end-point in seconds relative to stimulation onset of the trial (e.g. the standard tuple of '-1, 3' will extract the signal in the period from 1s before stimulation onset to 3s after stimulation onset). baseline_norm (None or str): Baseline normalization setting [None, 'Mean' or 'Median']. If other than None, normalizes each trial epoch by subtracting the mean or median of part of the trial (the epoch of the trial indicated in baseline_epoch) baseline_epoch (tuple): The time-span on which the baseline is calculated, expressed as a tuple with the start- and end-point in seconds relative to stimulation onset (e.g. the standard tuple of '-1, -.1' will use the period from 1s before stimulation onset to 100ms before stimulation onset to calculate the baseline on); this arguments is only used when baseline_norm is set to mean or median Returns: sampling_rate (int or double): the sampling rate at which the data was acquired data (ndarray): A three-dimensional array with data epochs per channel (format: channel x trials/epochs x time); or None when an error occurs Note: this function's units are in time relative to stimulation (e.g. trial) onset because the sample rate will only be known till after we read the data """ # initialize the return variables to a default sampling_rate = None data = None # TODO: handle different units in data format # # check input # # data-set format data_extension = data_path[data_path.rindex("."):] if data_extension == '.edf': data_format = 0 elif data_extension == '.vhdr' or data_extension == '.vmrk' or data_extension == '.eeg': data_format = 1 elif data_extension == '.mefd': data_format = 2 else: logging.error('Unknown data format (' + data_extension + ')') return None, None # if trial_epoch[1] < trial_epoch[0]: logging.error( 'Invalid \'trial_epoch\' parameter, the given end-point (at ' + str(trial_epoch[1]) + ') lies before the start-point (at ' + str(trial_epoch[0]) + ')') return None, None # baseline normalization baseline_method = 0 if baseline_norm is not None or len(baseline_norm) > 0: if baseline_norm.lower() == 'mean' or baseline_norm.lower( ) == 'average': baseline_method = 1 elif baseline_norm.lower() == 'median': baseline_method = 2 elif baseline_norm.lower() == 'none': baseline_method = 0 else: logging.error('Unknown normalization argument (' + baseline_norm + ')') return None, None # if baseline_epoch[1] < baseline_epoch[0]: logging.error( 'Invalid \'baseline_epoch\' parameter, the given end-point (at ' + str(baseline_epoch[1]) + ') lies before the start-point (at ' + str(baseline_epoch[0]) + ')') return None, None if data_format == 2: if baseline_epoch[0] < trial_epoch[0]: logging.error( 'Invalid \'baseline_epoch\' parameter, the given baseline start-point (at ' + str(baseline_epoch[0]) + ') lies before the trial start-point (at ' + str(trial_epoch[0]) + ')') return None, None if baseline_epoch[1] > trial_epoch[1]: logging.error( 'Invalid \'baseline_epoch\' parameter, the given baseline end-point (at ' + str(baseline_epoch[1]) + ') lies after the trial end-point (at ' + str(trial_epoch[1]) + ')') return None, None # # read and process the data # if data_format == 0 or data_format == 1: # EDF or BrainVision format, use MNE to read # Alternative for EDF (use pyedflib), low memory usage solution since it has the ability to read per channel #from pyedflib import EdfReader #f = EdfReader(data_path) #n = f.signals_in_file #signal_labels = f.getSignalLabels() #sampling_rate = f.getSampleFrequencies()[0] #size_time_s = int(ceil(abs(trial_epoch[1] - trial_epoch[0]) * sampling_rate)) #data = np.empty((len(channels_include), len(onsets), size_time_s)) #data.fill(np.nan) #for iChannel in range(len(channels)): # channel_index = signal_labels.index(channels[iChannel]) # signal = f.readSignal(channel_index) # for iTrial in range(len(onsets)): # sample_start = int(round(onsets[iTrial] * sampling_rate)) # data[iChannel, iTrial, :] = signal[sample_start:sample_start + size_time_s] # read the data try: if data_format == 0: mne_raw = read_raw_edf(data_path, eog=[], misc=[], stim_channel=[], preload=True, verbose=None) #mne_raw = read_raw_edf(data_path, eog=None, misc=None, stim_channel=[], exclude=channels_non_ieeg, preload=True, verbose=None) if data_format == 1: mne_raw = read_raw_brainvision( data_path[:data_path.rindex(".")] + '.vhdr', preload=True) except Exception as e: logging.error('MNE could not read data, message: ' + str(e)) return None, None # retrieve the sample-rate sampling_rate = mne_raw.info['sfreq'] # calculate the size of the time dimension (in samples) size_time_s = int( ceil(abs(trial_epoch[1] - trial_epoch[0]) * sampling_rate)) # initialize a data buffer (channel x trials/epochs x time) data = allocate_array((len(channels), len(onsets), size_time_s)) if data is None: return None, None # loop through the included channels for iChannel in range(len(channels)): # (try to) retrieve the index of the channel try: channel_index = mne_raw.info['ch_names'].index( channels[iChannel]) except ValueError: logging.error('Could not find channel \'' + channels[iChannel] + '\' in the dataset') return None, None # loop through the trials for iTrial in range(len(onsets)): # trial_sample_start = int( round((onsets[iTrial] + trial_epoch[0]) * sampling_rate)) if trial_sample_start < 0 or trial_sample_start + size_time_s >= len( mne_raw): logging.error( 'Cannot extract the trial with onset ' + str(onsets[iTrial]) + ', the range for extraction lies outside of the data') return None, None # if baseline_method > 0: baseline_start_sample = int( round((onsets[iTrial] + baseline_epoch[0]) * sampling_rate)) baseline_end_sample = int( round((onsets[iTrial] + baseline_epoch[1]) * sampling_rate)) if baseline_start_sample < 0 or baseline_end_sample >= len( mne_raw): logging.error( 'Cannot extract the baseline for the trial with onset ' + str(onsets[iTrial]) + ', the range for the baseline lies outside of the data' ) return None, None # extract the trial data and perform baseline normalization on the trial if needed if baseline_method == 0: data[iChannel, iTrial, :] = mne_raw[ channel_index, trial_sample_start:trial_sample_start + size_time_s][0] elif baseline_method == 1: baseline_mean = np.nanmean( mne_raw[channel_index, baseline_start_sample:baseline_end_sample][0]) data[iChannel, iTrial, :] = mne_raw[ channel_index, trial_sample_start:trial_sample_start + size_time_s][0] - baseline_mean elif baseline_method == 2: baseline_median = np.nanmedian( mne_raw[channel_index, baseline_start_sample:baseline_end_sample][0]) data[iChannel, iTrial, :] = mne_raw[ channel_index, trial_sample_start:trial_sample_start + size_time_s][0] - baseline_median # TODO: clear memory in MNE, close() doesn't seem to work, neither does remove the channels, issue MNE? mne_raw.close() del mne_raw # MNE always returns data in volt, convert to micro-volt data = data * 1000000 elif data_format == 2: # MEF3 format # read the session metadata try: mef = MefSession(data_path, '', read_metadata=True) except Exception: logging.error( 'PyMef could not read data, either a password is needed or the data is corrupt' ) return None, None # retrieve the sample-rate and total number of samples in the data-set sampling_rate = mef.session_md['time_series_metadata']['section_2'][ 'sampling_frequency'].item(0) num_samples = mef.session_md['time_series_metadata']['section_2'][ 'number_of_samples'].item(0) # calculate the size of the time dimension (in samples) size_time_s = int( ceil(abs(trial_epoch[1] - trial_epoch[0]) * sampling_rate)) # initialize a data buffer (channel x trials/epochs x time) data = allocate_array((len(channels), len(onsets), size_time_s)) if data is None: return None, None # create a progress bar print_progressbar(0, len(onsets), prefix='Progress:', suffix='Complete', length=50) # loop through the trials for iTrial in range(len(onsets)): # trial_sample_start = int( round((onsets[iTrial] + trial_epoch[0]) * sampling_rate)) if trial_sample_start < 0 or trial_sample_start + size_time_s >= num_samples: logging.error( 'Cannot extract the trial with onset ' + str(onsets[iTrial]) + ', the range for extraction lies outside of the data') return None, None # if baseline_method > 0: baseline_start_sample = int( round((onsets[iTrial] + baseline_epoch[0]) * sampling_rate)) - trial_sample_start baseline_end_sample = int( round((onsets[iTrial] + baseline_epoch[1]) * sampling_rate)) - trial_sample_start if baseline_start_sample < 0 or baseline_end_sample >= size_time_s: logging.error( 'Cannot extract the baseline, the range for the baseline lies outside of the trial epoch' ) return None, None # load the trial data try: trial_data = mef.read_ts_channels_sample( channels, [trial_sample_start, trial_sample_start + size_time_s]) if trial_data is None or (len(trial_data) > 0 and trial_data[0] is None): return None, None except Exception: logging.error( 'PyMef could not read data, either a password is needed or the data is corrupt' ) return None, None # loop through the channels for iChannel in range(len(channels)): if baseline_method == 0: data[iChannel, iTrial, :] = trial_data[iChannel] elif baseline_method == 1: baseline_mean = np.nanmean( trial_data[iChannel] [baseline_start_sample:baseline_end_sample]) data[iChannel, iTrial, :] = trial_data[iChannel] - baseline_mean elif baseline_method == 2: baseline_median = np.nanmedian( trial_data[iChannel] [baseline_start_sample:baseline_end_sample]) data[iChannel, iTrial, :] = trial_data[iChannel] - baseline_median del trial_data # update progress bar print_progressbar(iTrial + 1, len(onsets), prefix='Progress:', suffix='Complete', length=50) # return sampling_rate, data
def setUpClass(self): self.temp_dir = tempfile.TemporaryDirectory(suffix='.mefd') self.mef_session_path = self.temp_dir.name # Some presets self.secs_to_write = 10 self.samps_per_mef_block = 5000 self.sampling_frequency = 5000 self.secs_to_append = 5 self.discont_length = 2 self.secs_to_seg2 = 5 self.pwd_1 = 'chair' self.pwd_2 = 'table' self.start_time = 946684800000000 self.end_time = int(self.start_time + 1e6*self.secs_to_write) self.record_time_1 = int(self.start_time + 1e6) self.record_time_2 = int(self.start_time + 2*1e6) self.rec_offset = int(self.start_time - 1e6) # Create paths for channels and segments self.ts_channel = 'ts_channel' self.ts_channel_path = self.mef_session_path+'/ts_channel.timd' self.ts_seg1_path = self.ts_channel_path+'/ts_channel-000000.segd' self.ts_seg2_path = self.ts_channel_path+'/ts_channel-000001.segd' self.vid_channel = 'vid_channel' self.vid_channel_path = self.mef_session_path+'/vid_channel.vidd' self.vid_seg1_path = self.vid_channel_path+'/vid_channel-000000.segd' # Prepare dummy records self.record_list = [] # Create Note note_dict = {'type': 'Note', 'time': self.record_time_1, 'text': 'Note_test'} # Create SyLg sylg_dict = {'type': 'SyLg', 'time': self.record_time_1, 'text': 'SyLg_test'} # Create EDFA edfa_dict = {'type': 'EDFA', 'time': self.record_time_1, 'duration': 1000000, 'text': 'EDFA_test'} # Create LNTP lntp_dict = {'type': 'LNTP', 'time': self.record_time_1, 'length': 5, 'template': np.array([1, 2, 3, 4, 5])} # Create Seiz seiz_chans = [] seiz_chan_dict_1 = {'name': 'msel', 'onset': self.record_time_1, 'offset': self.record_time_2} seiz_chans.append(seiz_chan_dict_1) seiz_time = min([x['onset'] for x in seiz_chans]) earliest_onset = min([x['onset'] for x in seiz_chans]) latest_offset = max([x['offset'] for x in seiz_chans]) seiz_dict = {'type': 'Seiz', 'time': seiz_time, 'earliest_onset': earliest_onset, 'latest_offset': latest_offset, 'duration': latest_offset - earliest_onset, 'number_of_channels': len(seiz_chans), 'onset_code': 2, 'marker_name_1': 'beer_tap', 'marker_name_2': 'wine_tap', 'annotation': 'test_seizure', 'channels': seiz_chans} csti_dict = {'type': 'CSti', 'time': self.record_time_1, 'task_type': 'beerdrink', 'stimulus_duration': 1000000, 'stimulus_type': 'pilsner', 'patient_response': 'hmmm'} esti_dict = {'type': 'ESti', 'time': self.record_time_1, 'amplitude': 1.5, 'frequency': 250.5, 'pulse_width': 100, 'ampunit_code': 1, 'mode_code': 2, 'waveform': 'nice', 'anode': 'positive', 'catode': 'negative'} curs_dict = {'type': 'Curs', 'time': self.record_time_1, 'id_number': 5342, 'trace_timestamp': self.record_time_1+1000, 'latency': 1000000, 'value': 35.4, 'name': 'tequila'} epoc_dict = {'type': 'Epoc', 'time': self.record_time_1, 'id_number': 8967, 'timestamp': self.record_time_1+2000, 'end_timestamp': self.record_time_1+10000, 'duration': 8000, 'epoch_type': 'straight', 'text': 'vodka'} self.record_list.append(note_dict) self.record_list.append(sylg_dict) self.record_list.append(edfa_dict) self.record_list.append(lntp_dict) self.record_list.append(seiz_dict) self.record_list.append(csti_dict) self.record_list.append(esti_dict) self.record_list.append(curs_dict) self.record_list.append(epoc_dict) # Prepare dummy time series metadata self.section3_dict = {'recording_time_offset': self.rec_offset, 'DST_start_time': 0, 'DST_end_time': 0, 'GMT_offset': 3600, 'subject_name_1': b'Olaf', 'subject_name_2': b'Mefson', 'subject_ID': b'2017', 'recording_location': b'pub'} self.section2_ts_dict = {'channel_description': b'Test_channel', 'session_description': b'Test_session', 'recording_duration': 1, # TODO:test 0 / None 'reference_description': b'wine', 'acquisition_channel_number': 5, 'sampling_frequency': self.sampling_frequency, 'notch_filter_frequency_setting': 50.0, 'low_frequency_filter_setting': 1.0, 'high_frequency_filter_setting': 10.0, 'AC_line_frequency': 70, 'units_conversion_factor': 1.5, 'units_description': b'uV', 'maximum_native_sample_value': 0.0, 'minimum_native_sample_value': 0.0, 'start_sample': 0, # Different for segments 'number_of_blocks': 0, 'maximum_block_bytes': 0, 'maximum_block_samples': 0, 'maximum_difference_bytes': 0, 'block_interval': 0, 'number_of_discontinuities': 1, 'maximum_contiguous_blocks': 0, 'maximum_contiguous_block_bytes': 0, 'maximum_contiguous_samples': 0, 'number_of_samples': 0} # Second segment self.section2_ts_dict_seg2 = self.section2_ts_dict.copy() seg2_start_sample = self.sampling_frequency*(self.secs_to_write + self.secs_to_append) self.section2_ts_dict_seg2['start_sample'] = seg2_start_sample # Prepare dummy time series data N = self.sampling_frequency * self.secs_to_write self.raw_data = np.random.randint(-200, 200, N, dtype='int32') # Prepare dummy data for appending to test appedn function N = self.sampling_frequency * self.secs_to_append self.raw_data_to_append = np.random.randint(-200, 200, N, dtype='int32') self.raw_data_seg_1 = np.concatenate([self.raw_data, self.raw_data_to_append]) # Second segment data N = self.sampling_frequency * self.secs_to_seg2 self.raw_data_seg_2 = np.random.randint(-200, 200, N, dtype='int32') self.raw_data_all = np.concatenate([self.raw_data_seg_1, self.raw_data_seg_2]) # Preapare dummy video metadata and indices self.section2_v_dict = {'channel_description': b'Test_channel', 'session_description': b'Test_session', 'recording_duration': 1, 'horizontal_resolution': 10, 'vertical_resolution': 20, 'frame_rate': 60.0, 'number_of_clips': 1, 'maximum_clip_bytes': 5000, 'video_format': b'mpeg', 'video_file_CRC': 111111} self.v_index_entry = {'start_time': self.start_time, 'end_time': self.end_time, 'start_frame': 0, 'end_frame': 300, 'file_offset': 0, 'clip_bytes': 5000} self.v_index_entries_list = [self.v_index_entry] v_index_dtype = pymef3_file.create_vi_dtype() self.v_index_entries = np.zeros(len(self.v_index_entries_list), dtype=v_index_dtype) for i, vi in enumerate(self.v_index_entries_list): for key, value in vi.items(): self.v_index_entries[key][i] = value # Create mef session object ms = MefSession(self.mef_session_path, self.pwd_2, read_metadata=False) # Write dummy records ms.write_mef_records(self.pwd_1, self.pwd_2, self.start_time, self.end_time, self.rec_offset, self.record_list, channel=self.ts_channel, segment_n=0) # print("Records written at segment level") ms.write_mef_records(self.pwd_1, self.pwd_2, self.start_time, self.end_time, self.rec_offset, self.record_list, channel=self.ts_channel) # print("Records written at channel level") ms.write_mef_records(self.pwd_1, self.pwd_2, self.start_time, self.end_time, self.rec_offset, self.record_list) # print("Records written at session level") # Write dummy time series metadata # Note: the rest of the tmd2 fileds is subject to discussion ms.write_mef_ts_segment_metadata(self.ts_channel, 0, self.pwd_1, self.pwd_2, self.start_time, self.end_time, self.section2_ts_dict, self.section3_dict) # Write second segment seg2_start = int(self.end_time + (1e6*self.secs_to_append) + int(1e6*self.discont_length)) seg2_stop = seg2_start + int(self.secs_to_seg2 * 1e6) self.section2_ts_dict_seg2 = self.section2_ts_dict.copy() start_samp = self.sampling_frequency * (self.secs_to_write + self.secs_to_append) self.section2_ts_dict_seg2['start_sample'] = start_samp ms.write_mef_ts_segment_metadata(self.ts_channel, 1, self.pwd_1, self.pwd_2, seg2_start, seg2_stop, self.section2_ts_dict_seg2, self.section3_dict) # print("Time series metadata written") # Write dummy time series data # Write first segment ms.write_mef_ts_segment_data(self.ts_channel, 0, self.pwd_1, self.pwd_2, self.samps_per_mef_block, self.raw_data) # Append time series data and modify files accordinglly, # but update metadata first append_start = self.end_time append_stop = int(append_start + (self.secs_to_append * 1e6)) ms.append_mef_ts_segment_data(self.ts_channel, 0, self.pwd_1, self.pwd_2, append_start, append_stop, self.samps_per_mef_block, self.raw_data_to_append) # Write second segment ms.write_mef_ts_segment_data(self.ts_channel, 1, self.pwd_1, self.pwd_2, self.samps_per_mef_block, self.raw_data_seg_2) # print("Time series data and indices written") # Write dummy video metadata and indices ms.write_mef_v_segment_metadata(self.vid_channel, 0, self.pwd_1, self.pwd_2, self.start_time, self.end_time, self.section2_v_dict, self.section3_dict) # print("Video metadata written") ms.write_mef_v_segment_indices(self.vid_channel, 0, self.pwd_1, self.pwd_2, self.start_time, self.end_time, self.v_index_entries) ms.close() # print("Video indices written") # Read back session metadata (avoids reading metadata in each function) self.ms = MefSession(self.mef_session_path, self.pwd_2) self.smd = self.ms.session_md
class TestStringMethods(unittest.TestCase): @classmethod def setUpClass(self): self.temp_dir = tempfile.TemporaryDirectory(suffix='.mefd') self.mef_session_path = self.temp_dir.name # Some presets self.secs_to_write = 10 self.samps_per_mef_block = 5000 self.sampling_frequency = 5000 self.secs_to_append = 5 self.discont_length = 2 self.secs_to_seg2 = 5 self.pwd_1 = 'chair' self.pwd_2 = 'table' self.start_time = 946684800000000 self.end_time = int(self.start_time + 1e6*self.secs_to_write) self.record_time_1 = int(self.start_time + 1e6) self.record_time_2 = int(self.start_time + 2*1e6) self.rec_offset = int(self.start_time - 1e6) # Create paths for channels and segments self.ts_channel = 'ts_channel' self.ts_channel_path = self.mef_session_path+'/ts_channel.timd' self.ts_seg1_path = self.ts_channel_path+'/ts_channel-000000.segd' self.ts_seg2_path = self.ts_channel_path+'/ts_channel-000001.segd' self.vid_channel = 'vid_channel' self.vid_channel_path = self.mef_session_path+'/vid_channel.vidd' self.vid_seg1_path = self.vid_channel_path+'/vid_channel-000000.segd' # Prepare dummy records self.record_list = [] # Create Note note_dict = {'type': 'Note', 'time': self.record_time_1, 'text': 'Note_test'} # Create SyLg sylg_dict = {'type': 'SyLg', 'time': self.record_time_1, 'text': 'SyLg_test'} # Create EDFA edfa_dict = {'type': 'EDFA', 'time': self.record_time_1, 'duration': 1000000, 'text': 'EDFA_test'} # Create LNTP lntp_dict = {'type': 'LNTP', 'time': self.record_time_1, 'length': 5, 'template': np.array([1, 2, 3, 4, 5])} # Create Seiz seiz_chans = [] seiz_chan_dict_1 = {'name': 'msel', 'onset': self.record_time_1, 'offset': self.record_time_2} seiz_chans.append(seiz_chan_dict_1) seiz_time = min([x['onset'] for x in seiz_chans]) earliest_onset = min([x['onset'] for x in seiz_chans]) latest_offset = max([x['offset'] for x in seiz_chans]) seiz_dict = {'type': 'Seiz', 'time': seiz_time, 'earliest_onset': earliest_onset, 'latest_offset': latest_offset, 'duration': latest_offset - earliest_onset, 'number_of_channels': len(seiz_chans), 'onset_code': 2, 'marker_name_1': 'beer_tap', 'marker_name_2': 'wine_tap', 'annotation': 'test_seizure', 'channels': seiz_chans} csti_dict = {'type': 'CSti', 'time': self.record_time_1, 'task_type': 'beerdrink', 'stimulus_duration': 1000000, 'stimulus_type': 'pilsner', 'patient_response': 'hmmm'} esti_dict = {'type': 'ESti', 'time': self.record_time_1, 'amplitude': 1.5, 'frequency': 250.5, 'pulse_width': 100, 'ampunit_code': 1, 'mode_code': 2, 'waveform': 'nice', 'anode': 'positive', 'catode': 'negative'} curs_dict = {'type': 'Curs', 'time': self.record_time_1, 'id_number': 5342, 'trace_timestamp': self.record_time_1+1000, 'latency': 1000000, 'value': 35.4, 'name': 'tequila'} epoc_dict = {'type': 'Epoc', 'time': self.record_time_1, 'id_number': 8967, 'timestamp': self.record_time_1+2000, 'end_timestamp': self.record_time_1+10000, 'duration': 8000, 'epoch_type': 'straight', 'text': 'vodka'} self.record_list.append(note_dict) self.record_list.append(sylg_dict) self.record_list.append(edfa_dict) self.record_list.append(lntp_dict) self.record_list.append(seiz_dict) self.record_list.append(csti_dict) self.record_list.append(esti_dict) self.record_list.append(curs_dict) self.record_list.append(epoc_dict) # Prepare dummy time series metadata self.section3_dict = {'recording_time_offset': self.rec_offset, 'DST_start_time': 0, 'DST_end_time': 0, 'GMT_offset': 3600, 'subject_name_1': b'Olaf', 'subject_name_2': b'Mefson', 'subject_ID': b'2017', 'recording_location': b'pub'} self.section2_ts_dict = {'channel_description': b'Test_channel', 'session_description': b'Test_session', 'recording_duration': 1, # TODO:test 0 / None 'reference_description': b'wine', 'acquisition_channel_number': 5, 'sampling_frequency': self.sampling_frequency, 'notch_filter_frequency_setting': 50.0, 'low_frequency_filter_setting': 1.0, 'high_frequency_filter_setting': 10.0, 'AC_line_frequency': 70, 'units_conversion_factor': 1.5, 'units_description': b'uV', 'maximum_native_sample_value': 0.0, 'minimum_native_sample_value': 0.0, 'start_sample': 0, # Different for segments 'number_of_blocks': 0, 'maximum_block_bytes': 0, 'maximum_block_samples': 0, 'maximum_difference_bytes': 0, 'block_interval': 0, 'number_of_discontinuities': 1, 'maximum_contiguous_blocks': 0, 'maximum_contiguous_block_bytes': 0, 'maximum_contiguous_samples': 0, 'number_of_samples': 0} # Second segment self.section2_ts_dict_seg2 = self.section2_ts_dict.copy() seg2_start_sample = self.sampling_frequency*(self.secs_to_write + self.secs_to_append) self.section2_ts_dict_seg2['start_sample'] = seg2_start_sample # Prepare dummy time series data N = self.sampling_frequency * self.secs_to_write self.raw_data = np.random.randint(-200, 200, N, dtype='int32') # Prepare dummy data for appending to test appedn function N = self.sampling_frequency * self.secs_to_append self.raw_data_to_append = np.random.randint(-200, 200, N, dtype='int32') self.raw_data_seg_1 = np.concatenate([self.raw_data, self.raw_data_to_append]) # Second segment data N = self.sampling_frequency * self.secs_to_seg2 self.raw_data_seg_2 = np.random.randint(-200, 200, N, dtype='int32') self.raw_data_all = np.concatenate([self.raw_data_seg_1, self.raw_data_seg_2]) # Preapare dummy video metadata and indices self.section2_v_dict = {'channel_description': b'Test_channel', 'session_description': b'Test_session', 'recording_duration': 1, 'horizontal_resolution': 10, 'vertical_resolution': 20, 'frame_rate': 60.0, 'number_of_clips': 1, 'maximum_clip_bytes': 5000, 'video_format': b'mpeg', 'video_file_CRC': 111111} self.v_index_entry = {'start_time': self.start_time, 'end_time': self.end_time, 'start_frame': 0, 'end_frame': 300, 'file_offset': 0, 'clip_bytes': 5000} self.v_index_entries_list = [self.v_index_entry] v_index_dtype = pymef3_file.create_vi_dtype() self.v_index_entries = np.zeros(len(self.v_index_entries_list), dtype=v_index_dtype) for i, vi in enumerate(self.v_index_entries_list): for key, value in vi.items(): self.v_index_entries[key][i] = value # Create mef session object ms = MefSession(self.mef_session_path, self.pwd_2, read_metadata=False) # Write dummy records ms.write_mef_records(self.pwd_1, self.pwd_2, self.start_time, self.end_time, self.rec_offset, self.record_list, channel=self.ts_channel, segment_n=0) # print("Records written at segment level") ms.write_mef_records(self.pwd_1, self.pwd_2, self.start_time, self.end_time, self.rec_offset, self.record_list, channel=self.ts_channel) # print("Records written at channel level") ms.write_mef_records(self.pwd_1, self.pwd_2, self.start_time, self.end_time, self.rec_offset, self.record_list) # print("Records written at session level") # Write dummy time series metadata # Note: the rest of the tmd2 fileds is subject to discussion ms.write_mef_ts_segment_metadata(self.ts_channel, 0, self.pwd_1, self.pwd_2, self.start_time, self.end_time, self.section2_ts_dict, self.section3_dict) # Write second segment seg2_start = int(self.end_time + (1e6*self.secs_to_append) + int(1e6*self.discont_length)) seg2_stop = seg2_start + int(self.secs_to_seg2 * 1e6) self.section2_ts_dict_seg2 = self.section2_ts_dict.copy() start_samp = self.sampling_frequency * (self.secs_to_write + self.secs_to_append) self.section2_ts_dict_seg2['start_sample'] = start_samp ms.write_mef_ts_segment_metadata(self.ts_channel, 1, self.pwd_1, self.pwd_2, seg2_start, seg2_stop, self.section2_ts_dict_seg2, self.section3_dict) # print("Time series metadata written") # Write dummy time series data # Write first segment ms.write_mef_ts_segment_data(self.ts_channel, 0, self.pwd_1, self.pwd_2, self.samps_per_mef_block, self.raw_data) # Append time series data and modify files accordinglly, # but update metadata first append_start = self.end_time append_stop = int(append_start + (self.secs_to_append * 1e6)) ms.append_mef_ts_segment_data(self.ts_channel, 0, self.pwd_1, self.pwd_2, append_start, append_stop, self.samps_per_mef_block, self.raw_data_to_append) # Write second segment ms.write_mef_ts_segment_data(self.ts_channel, 1, self.pwd_1, self.pwd_2, self.samps_per_mef_block, self.raw_data_seg_2) # print("Time series data and indices written") # Write dummy video metadata and indices ms.write_mef_v_segment_metadata(self.vid_channel, 0, self.pwd_1, self.pwd_2, self.start_time, self.end_time, self.section2_v_dict, self.section3_dict) # print("Video metadata written") ms.write_mef_v_segment_indices(self.vid_channel, 0, self.pwd_1, self.pwd_2, self.start_time, self.end_time, self.v_index_entries) ms.close() # print("Video indices written") # Read back session metadata (avoids reading metadata in each function) self.ms = MefSession(self.mef_session_path, self.pwd_2) self.smd = self.ms.session_md # ----- Read metadata tests ----- def test_read_ts_segment_metadata(self): pymef3_file.read_mef_segment_metadata(self.ts_seg1_path, self.pwd_2) def test_read_ts_channel_metadata(self): pymef3_file.read_mef_channel_metadata(self.ts_channel_path, self.pwd_2) def test_read_vid_segment_metadata(self): pymef3_file.read_mef_segment_metadata(self.vid_seg1_path, self.pwd_2) def test_read_vid_channel_metadata(self): pymef3_file.read_mef_channel_metadata(self.vid_channel_path, self.pwd_2) def test_read_session_metadata(self): pymef3_file.read_mef_session_metadata(self.mef_session_path, self.pwd_2) # ----- Mef write / read comparison ----- def test_record_reading(self): read_records = self.ms.read_records('ts_channel', 0) self.assertEqual(len(self.record_list), len(read_records)) for rec_id in range(len(self.record_list)): write_record = self.record_list[rec_id] read_record = read_records[rec_id] # print('Record type: ---'+write_record['type_string']+'---') # Record header self.assertEqual(write_record['time'], read_record['time']) self.assertEqual(write_record['type'], read_record['type']) # Record body if write_record['type'] == 'EDFA': self.assertEqual(write_record['duration'], read_record['duration']) self.assertEqual(write_record['text'], read_record['text']) if write_record['type'] == 'Note': self.assertEqual(write_record['text'], read_record['text']) if write_record['type'] == 'SyLg': self.assertEqual(write_record['text'], read_record['text']) if write_record['type'] == 'Seiz': self.assertEqual(write_record['earliest_onset'], read_record['earliest_onset']) self.assertEqual(write_record['latest_offset'], read_record['latest_offset']) self.assertEqual(write_record['duration'], read_record['duration']) self.assertEqual(write_record['number_of_channels'], read_record['number_of_channels']) self.assertEqual(write_record['onset_code'], read_record['onset_code']) self.assertEqual(write_record['marker_name_1'], read_record['marker_name_1']) self.assertEqual(write_record['marker_name_2'], read_record['marker_name_2']) self.assertEqual(write_record['annotation'], read_record['annotation']) # Check the channel entries for ci, write_channel in enumerate(write_record['channels']): read_channel = read_record['channels'][ci] self.assertEqual(write_channel['name'], read_channel['name']) self.assertEqual(write_channel['onset'], read_channel['onset']) self.assertEqual(write_channel['offset'], read_channel['offset']) if write_record['type'] == 'CSti': self.assertEqual(write_record['task_type'], read_record['task_type']) self.assertEqual(write_record['stimulus_duration'], read_record['stimulus_duration']) self.assertEqual(write_record['stimulus_type'], read_record['stimulus_type']) self.assertEqual(write_record['patient_response'], read_record['patient_response']) if write_record['type'] == 'ESti': self.assertEqual(write_record['amplitude'], read_record['amplitude']) self.assertEqual(write_record['frequency'], read_record['frequency']) self.assertEqual(write_record['pulse_width'], read_record['pulse_width']) self.assertEqual(write_record['ampunit_code'], read_record['ampunit_code']) self.assertEqual(write_record['mode_code'], read_record['mode_code']) self.assertEqual(write_record['waveform'], read_record['waveform']) self.assertEqual(write_record['anode'], read_record['anode']) self.assertEqual(write_record['catode'], read_record['catode']) if write_record['type'] == 'Curs': self.assertEqual(write_record['id_number'], read_record['id_number']) self.assertEqual(write_record['trace_timestamp'], read_record['trace_timestamp']) self.assertEqual(write_record['latency'], read_record['latency']) self.assertEqual(write_record['value'], read_record['value']), self.assertEqual(write_record['name'], read_record['name']) if write_record['type'] == 'Epoc': self.assertEqual(write_record['id_number'], read_record['id_number']) self.assertEqual(write_record['timestamp'], read_record['timestamp']) self.assertEqual(write_record['end_timestamp'], read_record['end_timestamp']) self.assertEqual(write_record['duration'], read_record['duration']), self.assertEqual(write_record['epoch_type'], read_record['epoch_type']) self.assertEqual(write_record['text'], read_record['text']) def test_time_series_metadata_section_2_usr(self): segments = self.smd['time_series_channels']['ts_channel']['segments'] seg_md = segments['ts_channel-000000'] # User specified fields section 2 section_2_usr_field_list = ['channel_description', 'session_description', 'reference_description', 'acquisition_channel_number', 'sampling_frequency', 'notch_filter_frequency_setting', 'low_frequency_filter_setting', 'high_frequency_filter_setting', 'AC_line_frequency', 'units_conversion_factor', 'units_description'] for md2_user_key in section_2_usr_field_list: self.assertEqual(self.section2_ts_dict[md2_user_key], seg_md['section_2'][md2_user_key][0]) def test_time_series_metadata_section_2_auto(self): segments = self.smd['time_series_channels']['ts_channel']['segments'] seg_md = segments['ts_channel-000000'] # Fields that are created by C code during data writing max_sample_val = (np.max(self.raw_data_seg_1) * self.section2_ts_dict['units_conversion_factor']) self.assertEqual(max_sample_val, seg_md['section_2']['maximum_native_sample_value'][0]) min_sample_val = (np.min(self.raw_data_seg_1) * self.section2_ts_dict['units_conversion_factor']) self.assertEqual(min_sample_val, seg_md['section_2']['minimum_native_sample_value'][0]) # TODO: This field should be tested across segments self.assertEqual(self.section2_ts_dict['start_sample'], seg_md['section_2']['start_sample'][0]) min_sample_val = (np.min(self.raw_data_seg_1) * self.section2_ts_dict['units_conversion_factor']) N_blocks = np.ceil((self.sampling_frequency * (self.secs_to_write + self.secs_to_append)) / self.samps_per_mef_block) self.assertEqual(N_blocks, seg_md['section_2']['number_of_blocks'][0]) self.assertEqual(self.samps_per_mef_block, seg_md['section_2']['maximum_block_samples'][0]) N_samples = (self.sampling_frequency * (self.secs_to_write + self.secs_to_append)) self.assertEqual(N_samples, seg_md['section_2']['number_of_samples'][0]) rec_duration = int((N_samples / self.sampling_frequency) * 1e6) self.assertEqual(rec_duration, seg_md['section_2']['recording_duration'][0]) def test_time_series_metadata_section_3(self): segments = self.smd['time_series_channels']['ts_channel']['segments'] seg_md = segments['ts_channel-000000'] for md3_key in self.section3_dict.keys(): self.assertEqual(self.section3_dict[md3_key], seg_md['section_3'][md3_key][0]) def test_video_metadata_section_2(self): segments = self.smd['video_channels']['vid_channel']['segments'] seg_md = segments['vid_channel-000000'] for md2_key in self.section2_v_dict.keys(): self.assertEqual(self.section2_v_dict[md2_key], seg_md['section_2'][md2_key][0]) def test_video_metadata_section_3(self): segments = self.smd['video_channels']['vid_channel']['segments'] seg_md = segments['vid_channel-000000'] for md3_key in self.section3_dict.keys(): self.assertEqual(self.section3_dict[md3_key], seg_md['section_3'][md3_key][0]) def test_time_series_data(self): read_data = self.ms.read_ts_channels_sample(self.ts_channel, [None, None]) # Check the sums self.assertEqual(np.sum(self.raw_data_all), np.sum(read_data)) # ----- Data reading tests ----- # Reading by sample def test_start_sample_out_of_file(self): ch_md = self.smd['time_series_channels']['ts_channel'] ch_md2 = ch_md['section_2'] start = int(ch_md2['start_sample'] - self.samps_per_mef_block) end = int(ch_md2['start_sample'] + self.samps_per_mef_block) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") warning_text = ('Start sample smaller than 0. ' 'Setting start sample to 0') self.ms.read_ts_channels_sample(self.ts_channel, [start, end]) self.assertEqual(len(w), 1) self.assertEqual(warning_text, str(w[-1].message)) assert issubclass(w[-1].category, RuntimeWarning) def test_end_sample_out_of_file(self): ch_md = self.smd['time_series_channels']['ts_channel'] ch_md2 = ch_md['section_2'] start = int(ch_md2['number_of_samples'] - self.samps_per_mef_block) end = int(ch_md2['number_of_samples'] + self.samps_per_mef_block) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") warning_text = ('Stop sample larger than number of samples. ' 'Setting end sample to number of samples in ' 'channel') self.ms.read_ts_channels_sample(self.ts_channel, [start, end]) self.assertEqual(len(w), 1) self.assertEqual(warning_text, str(w[-1].message)) assert issubclass(w[-1].category, RuntimeWarning) def test_start_end_sample_before_file(self): ch_md = self.smd['time_series_channels']['ts_channel'] ch_md2 = ch_md['section_2'] start = int(ch_md2['start_sample'] - (self.samps_per_mef_block * 2)) end = int(ch_md2['start_sample'] - self.samps_per_mef_block) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") warning_text = ('Start and stop samples are out of file. ' 'Returning None') self.ms.read_ts_channels_sample(self.ts_channel, [start, end]) self.assertEqual(len(w), 1) self.assertEqual(warning_text, str(w[-1].message)) assert issubclass(w[-1].category, RuntimeWarning) def test_start_end_sample_after_file(self): ch_md = self.smd['time_series_channels']['ts_channel'] ch_md2 = ch_md['section_2'] start = int(ch_md2['number_of_samples'] + self.samps_per_mef_block) end = int(ch_md2['number_of_samples'] + (self.samps_per_mef_block * 2)) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") warning_text = ('Start and stop samples are out of file. ' 'Returning None') self.ms.read_ts_channels_sample(self.ts_channel, [start, end]) self.assertEqual(len(w), 1) self.assertEqual(warning_text, str(w[-1].message)) assert issubclass(w[-1].category, RuntimeWarning) return def test_start_sample_bigger_than_end_sample(self): error_text = 'Start sample larger than end sample, exiting...' try: self.ms.read_ts_channels_sample(self.ts_channel, [5, 10]) except Exception as e: self.assertEqual(error_text, str(e)) # Reading by uutc def test_start_uutc_out_of_file(self): ch_md = self.smd['time_series_channels']['ts_channel'] ch_md_spec = ch_md['channel_specific_metadata'] start = int(ch_md_spec['earliest_start_time'] - 1e6) end = int(ch_md_spec['earliest_start_time'] + 1e6) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") warning_text = ('Start uutc earlier than earliest start time. ' 'Will insert NaNs') self.ms.read_ts_channels_uutc(self.ts_channel, [start, end]) self.assertEqual(len(w), 1) self.assertEqual(warning_text, str(w[-1].message)) assert issubclass(w[-1].category, RuntimeWarning) def test_end_uutc_out_of_file(self): ch_md = self.smd['time_series_channels']['ts_channel'] ch_md_spec = ch_md['channel_specific_metadata'] start = int(ch_md_spec['latest_end_time'] - 1e6) end = int(ch_md_spec['latest_end_time'] + 1e6) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") warning_text = ('Stop uutc later than latest end time. ' 'Will insert NaNs') self.ms.read_ts_channels_uutc(self.ts_channel, [start, end]) self.assertEqual(len(w), 1) self.assertEqual(warning_text, str(w[-1].message)) assert issubclass(w[-1].category, RuntimeWarning) def test_start_end_uutc_before_file(self): ch_md = self.smd['time_series_channels']['ts_channel'] ch_md_spec = ch_md['channel_specific_metadata'] start = int(ch_md_spec['earliest_start_time'] - (1e6 * 2)) end = int(ch_md_spec['earliest_start_time'] - 1e6) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") warning_text = ('Start and stop times are out of file. ' 'Returning None') self.ms.read_ts_channels_uutc(self.ts_channel, [start, end]) self.assertEqual(len(w), 1) self.assertEqual(warning_text, str(w[-1].message)) assert issubclass(w[-1].category, RuntimeWarning) def test_start_end_uutc_after_file(self): ch_md = self.smd['time_series_channels']['ts_channel'] ch_md_spec = ch_md['channel_specific_metadata'] start = int(ch_md_spec['latest_end_time'] + 1e6) end = int(ch_md_spec['latest_end_time'] + (1e6 * 2)) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") warning_text = ('Start and stop times are out of file. ' 'Returning None') self.ms.read_ts_channels_uutc(self.ts_channel, [start, end]) self.assertEqual(len(w), 1) self.assertEqual(warning_text, str(w[-1].message)) assert issubclass(w[-1].category, RuntimeWarning) def test_start_uutc_in_discontinuity(self): discont_end_time = int(self.end_time + (1e6*self.secs_to_append) + int(1e6*self.discont_length)) start = int(discont_end_time - 5e5) end = int(discont_end_time + 5e5) data = self.ms.read_ts_channels_uutc(self.ts_channel, [start, end]) # Check for the number of NaNs N_nans = (5e5 / 1e6) * self.sampling_frequency read_N_nans = np.sum(np.isnan(data)) self.assertEqual(N_nans, read_N_nans) def test_end_uutc_in_discontinuity(self): discont_start_time = int(self.end_time) + int(1e6*self.secs_to_append) start = int(discont_start_time - 5e5) end = int(discont_start_time + 5e5) data = self.ms.read_ts_channels_uutc(self.ts_channel, [start, end]) # Check for the number of NaNs N_nans = (5e5 / 1e6) * self.sampling_frequency read_N_nans = np.sum(np.isnan(data)) self.assertEqual(N_nans, read_N_nans) def test_start_uutc_bigger_than_end_uutc(self): error_text = 'Start time later than end time, exiting...' try: self.ms.read_ts_channels_uutc(self.ts_channel, [self.start_time + int(2*1e6), self.start_time + int(1*1e6)]) except Exception as e: self.assertEqual(error_text, str(e)) def test_append_nonexistent_segment(self): error_text = "Data file '"+self.ms.path+"ts_channel.timd/ts_channel-000005.segd/ts_channel-000005.tdat' does not exist!" append_start = self.end_time + (self.secs_to_append * 5 * 1e6) append_stop = int(append_start + (self.secs_to_append * 1e6)) try: self.ms.append_mef_ts_segment_data(self.ts_channel, 5, self.pwd_1, self.pwd_2, append_start, append_stop, self.samps_per_mef_block, self.raw_data_to_append) except Exception as e: self.assertEqual(error_text, str(e)) # ----- Pymef helpers ----- def test_wrong_password(self): ts_metadata_file = self.ts_seg1_path + '/ts_channel-000000.tmet' result = pymef3_file.check_mef_password(ts_metadata_file, 'bu') self.assertEqual(-1, result) def test_level_1_password(self): ts_metadata_file = self.ts_seg1_path + '/ts_channel-000000.tmet' result = pymef3_file.check_mef_password(ts_metadata_file, self.pwd_1) self.assertEqual(1, result) def test_level_2_password(self): ts_metadata_file = self.ts_seg1_path + '/ts_channel-000000.tmet' result = pymef3_file.check_mef_password(ts_metadata_file, self.pwd_2) self.assertEqual(2, result)
def __init__(self, session_path, overwrite=False, password1=None, password2=None, verbose=False): self.pwd1 = password1 self.pwd2 = password2 self.bi = None self.channel_info = {} # ------- properties ------ self._mef_block_len = None self._record_offset = 0 self.verbose = verbose # maximal nans in continuous block to be stored in data and not indexed self._max_nans_written = 'fs' # units of data stored self._data_units = b'uV' # from pymef library self.section3_dict = { 'recording_time_offset': np.nan, 'DST_start_time': 0, 'DST_end_time': 0, 'GMT_offset': -6 * 3600, 'subject_name_1': b'none', 'subject_name_2': b'none', 'subject_ID': b'None', 'recording_location': b'P' } self.section2_ts_dict = { 'channel_description': b'ts_channel', 'session_description': b'ts_session', 'recording_duration': np.nan, 'reference_description': b'None', 'acquisition_channel_number': 1, 'sampling_frequency': np.nan, 'notch_filter_frequency_setting': 0, 'low_frequency_filter_setting': 1, 'high_frequency_filter_setting': 10, 'AC_line_frequency': 0, 'units_conversion_factor': 1.0, 'units_description': copy(self._data_units), 'maximum_native_sample_value': 0.0, 'minimum_native_sample_value': 0.0, 'start_sample': 0, # Different for segments 'number_of_blocks': 0, 'maximum_block_bytes': 0, 'maximum_block_samples': 0, 'maximum_difference_bytes': 0, 'block_interval': 0, 'number_of_discontinuities': 0, 'maximum_contiguous_blocks': 0, 'maximum_contiguous_block_bytes': 0, 'maximum_contiguous_samples': 0, 'number_of_samples': 0 } if overwrite is True: if os.path.exists(session_path): rmtree(session_path) time.sleep( 3 ) # wait till all files are gone. Problems when many files, especially on a network drive self.session = MefSession(session_path, password2, False, True) else: if os.path.exists(session_path): self.session = MefSession(session_path, password2, False, False) self._reload_session_info() else: self.session = MefSession(session_path, password2, False, True)
class MefWriter: """ MefWriter class is a high level util class for easy mef3 data writing. """ __version__ = '2.0.0' def __init__(self, session_path, overwrite=False, password1=None, password2=None, verbose=False): self.pwd1 = password1 self.pwd2 = password2 self.bi = None self.channel_info = {} # ------- properties ------ self._mef_block_len = None self._record_offset = 0 self.verbose = verbose # maximal nans in continuous block to be stored in data and not indexed self._max_nans_written = 'fs' # units of data stored self._data_units = b'uV' # from pymef library self.section3_dict = { 'recording_time_offset': np.nan, 'DST_start_time': 0, 'DST_end_time': 0, 'GMT_offset': -6 * 3600, 'subject_name_1': b'none', 'subject_name_2': b'none', 'subject_ID': b'None', 'recording_location': b'P' } self.section2_ts_dict = { 'channel_description': b'ts_channel', 'session_description': b'ts_session', 'recording_duration': np.nan, 'reference_description': b'None', 'acquisition_channel_number': 1, 'sampling_frequency': np.nan, 'notch_filter_frequency_setting': 0, 'low_frequency_filter_setting': 1, 'high_frequency_filter_setting': 10, 'AC_line_frequency': 0, 'units_conversion_factor': 1.0, 'units_description': copy(self._data_units), 'maximum_native_sample_value': 0.0, 'minimum_native_sample_value': 0.0, 'start_sample': 0, # Different for segments 'number_of_blocks': 0, 'maximum_block_bytes': 0, 'maximum_block_samples': 0, 'maximum_difference_bytes': 0, 'block_interval': 0, 'number_of_discontinuities': 0, 'maximum_contiguous_blocks': 0, 'maximum_contiguous_block_bytes': 0, 'maximum_contiguous_samples': 0, 'number_of_samples': 0 } if overwrite is True: if os.path.exists(session_path): rmtree(session_path) time.sleep( 3 ) # wait till all files are gone. Problems when many files, especially on a network drive self.session = MefSession(session_path, password2, False, True) else: if os.path.exists(session_path): self.session = MefSession(session_path, password2, False, False) self._reload_session_info() else: self.session = MefSession(session_path, password2, False, True) def __del__(self): self.session.close() def _reload_session_info(self): self.session.reload() self.bi = self.session.read_ts_channel_basic_info() self.channel_info = {info['name']: deepcopy(info) for info in self.bi} for ch in self.channel_info.keys(): self.channel_info[ch]['n_segments'] = len( self.session.session_md['time_series_channels'][ch] ['segments']) self.channel_info[ch]['mef_block_len'] = int64( self.get_mefblock_len(self.channel_info[ch]['fsamp'][0])) def write_data(self, data_write, channel, start_uutc, sampling_freq, end_uutc=None, precision=None, new_segment=False, discont_handler=True, reload_metadata=True): """ General method for writing any data to the session. Method handles new channel data or appending to existing channel data automatically. Discont handler flag can be used for fragmentation to smaller intervals which are written in sequence with nans intervals skipped. Parameters ---------- data_write : np.ndarray data to be written, data will be scaled a translated to int32 automatically if precision parameter is not given channel : str name of the stored channel start_uutc : int64 uutc timestamp of the first sample sampling_freq : float only 0.1 Hz resolution is tested end_uutc : int, optional end of the data uutc timestamp, if less data is provided than end_uutc - start_uutc nans gap will be inserted to the data precision : int, optional Number of floating point to be scaled above zero. Data are multiplied by 10**precision before writing and scale factor is stored in metadata. used for transforming data to int32, can be positive or 0 = no change in scale, only loss of decimals. new_segment : bool, optional if new mef3 segment should be created discont_handler: bool, optional disconnected segments will be stored in intervals if the gap in data is higher than max_nans_written property reload_metadata: bool, optional default: true. Parameter Controls reloading of metadata after writing new data - frequent call of write method on short signals can slow down writing. When false appending is not protected for correct endtime check, but data write is faster. Metadata are always reloaded with new segment. Returns ------- out : bool True on success """ # infer end_uutc from data if end_uutc is None: end_uutc = int64(start_uutc + (len(data_write) / sampling_freq * 1e6)) # check times are correct if end_uutc < start_uutc: print( f"WARNING: incorrect End uutc time {end_uutc} is before beginning: {start_uutc}" ) return None # check if any data exists -> apend or create new segment if channel in self.channel_info.keys(): # check if it is possible to write with configuration provided if start_uutc < self.channel_info[channel]['end_time'][0]: print( ' Given start time is before end time of data already written to the session. Returning None' ) return None # NOTE fs can be different in the new segment but we dont work with different fs in the same channel if sampling_freq != self.channel_info[channel]['fsamp'][0]: print( ' Sampling frequency of provided data does not match fs of already written data' ) return None # read precision from metadata - scale factor / can be different in new segment but not implemented precision = int(-1 * np.log10(self.channel_info[channel]['ufact'][0])) # convert data to int32 data_converted = convert_data_to_int32(data_write, precision=precision) # check new segment flag segment = self.channel_info[channel]['n_segments'] # new channel data with no previous data else: segment = 0 new_segment = True if precision is None: print('WARNING: precision is not specified, infering...') precision = infer_conversion_factor(data_write) print(f'INFO: precision set to {precision}') ufact = np.round(0.1**precision, precision) # convert data to int32 self.channel_info[channel] = { 'mef_block_len': self.get_mefblock_len(sampling_freq), 'ufact': [ufact] } data_converted = convert_data_to_int32(data_write, precision=precision) # discont handler writes fragmented intervals ( skip nans greater than specified) if discont_handler: if self.max_nans_written == 'fs': max_nans = int(sampling_freq) else: max_nans = self.max_nans_written input_bin_vector = ~np.isnan(data_write) df_intervals = find_intervals_binary_vector( input_bin_vector, sampling_freq, start_uutc, samples_of_nans_allowed=max_nans) else: df_intervals = pd.DataFrame(data={ 'start_samples': 0, 'stop_samples': len(data_converted), 'start_uutc': start_uutc, 'stop_uutc': end_uutc }, index=[0]) print( f'INFO: total number of intervals to be written: {len(df_intervals)}' ) print(f'Running...') if new_segment: for i, row in df_intervals.iterrows(): data_part = data_converted[ row['start_samples']:row['stop_samples']] if i == 0: self._create_segment(data=data_part, channel=channel, start_uutc=row['start_uutc'], end_uutc=row['stop_uutc'], sampling_frequency=sampling_freq, segment=segment) else: self._append_block(data=data_part, channel=channel, start_uutc=row['start_uutc'], end_uutc=row['stop_uutc'], segment=segment) reload_metadata = True # append to a last segment else: segment -= 1 for i, row in df_intervals.iterrows(): data_part = data_converted[ row['start_samples']:row['stop_samples']] self._append_block(data=data_part, channel=channel, start_uutc=row['start_uutc'], end_uutc=row['stop_uutc'], segment=segment) if reload_metadata: self._reload_session_info() print('INFO: data write method finished.') return True def write_annotations(self, annotations, channel=None): """ Method writes annotations to a session/channel. Method handles new annotations or appending to existing annotations. Input data has to have required structure. Parameters ---------- annotations : pandas.DataFrame DataFrame has to have a proper structure with columns - time column [uutctimestamp], type ['str specified in pymef' - Note or EDFA], text ['str'], optional duration [usec] channel : str, optional annotations are written at the channel level """ # check int of time column if not np.issubdtype(annotations['time'].dtype, np.int64): annotations['time'] = annotations['time'].astype(np.int64) # check duration for int if 'duration' in annotations.columns: if not np.issubdtype(annotations['duration'].dtype, np.int64): annotations['duration'] = annotations['duration'].astype( np.int64) start_time = annotations['time'].min() end_time = annotations['time'].max() record_list = annotations.to_dict('records') # read old annotations print(' Reading previously stored annotations...') previous_list = self._read_annotation_record(channel=channel) if previous_list is not None: read_annotations = pd.DataFrame(previous_list) read_start = read_annotations['time'].min() read_end = read_annotations['time'].max() if read_start < start_time: start_time = read_start if read_end > end_time: end_time = read_end record_list.extend(previous_list) self._write_annotation_record(start_time, end_time, record_list, channel=channel) print( f'Annotations written, total {len(record_list)}, channel: {channel}' ) return def _write_annotation_record(self, start_time, end_time, record_list, channel=None): record_offset = self.record_offset if channel is None: self.session.write_mef_records(self.pwd1, self.pwd2, start_time, end_time, record_offset, record_list) else: self.session.write_mef_records(self.pwd1, self.pwd2, start_time, end_time, record_offset, record_list, channel=channel) self.session.reload() def _read_annotation_record(self, channel=None): try: annot_list = None if channel is None: annot_list = self.session.read_records() else: annot_list = self.session.read_records(channel=channel) except TypeError as exc: print( 'WARNING: read of annotations record failed, no annotations returned' ) except KeyError as exc: print( 'WARNING: read of annotations record failed, no annotations returned' ) return annot_list def _create_segment( self, data=None, channel=None, start_uutc=None, end_uutc=None, sampling_frequency=None, segment=0, ): if data.dtype != np.int32: raise AssertionError( '[TYPE ERROR] - MEF file writer accepts only int32 signal datatype!' ) if end_uutc < start_uutc: raise ValueError('End uutc timestamp lower than the start_uutc') self.section2_ts_dict['sampling_frequency'] = sampling_frequency # DEFAULT VALS FOR Segment 0 if segment == 0: self.section3_dict[ 'recording_time_offset'] = self.record_offset # int(start_uutc) self.section2_ts_dict['start_sample'] = 0 else: self.section3_dict[ 'recording_time_offset'] = self.record_offset # int(self.channel_info[channel]['start_time'][0]) self.section2_ts_dict['start_sample'] = int64( self.channel_info[channel]['nsamp'][0]) self.section2_ts_dict['recording_duration'] = int64( (end_uutc - start_uutc) / 1e6) self.section2_ts_dict['units_conversion_factor'] = self.channel_info[ channel]['ufact'][0] print( f"INFO: creating new segment data for channel: {channel}, segment: {segment}, fs: {sampling_frequency}, ufac:" f" {self.channel_info[channel]['ufact'][0]}, start: {start_uutc}, stop {end_uutc} " ) self.session.write_mef_ts_segment_metadata(channel, segment, self.pwd1, self.pwd2, start_uutc, end_uutc, dict(self.section2_ts_dict), dict(self.section3_dict)) self.session.write_mef_ts_segment_data( channel, segment, self.pwd1, self.pwd2, self.channel_info[channel]['mef_block_len'], data) def _append_block(self, data=None, channel=None, start_uutc=None, end_uutc=None, segment=0): if end_uutc < start_uutc: raise ValueError('End uutc timestamp lower than the start_uutc') if self.verbose: print( f"INFO: appending new data for channel: {channel}, segment: {segment}, ufac:" f" {self.channel_info[channel]['ufact'][0]}, start: {start_uutc}, stop {end_uutc} " ) self.session.append_mef_ts_segment_data( channel, int64(segment), self.pwd1, self.pwd2, start_uutc, end_uutc, self.channel_info[channel]['mef_block_len'], data) def get_mefblock_len(self, fs): if self.mef_block_len is not None: return self.mef_block_len if fs >= 5000: return int(fs) else: if fs < 0: return int(fs * 100) else: return int(fs * 10) @property def max_nans_written(self): return self._max_nans_written @max_nans_written.setter def max_nans_written(self, max_samples): if (max_samples < 0) | (not (isinstance(max_samples, int))): print("incorrect value, please provide positive int") return self._max_nans_written = max_samples @property def data_units(self): return self._data_units @data_units.setter def data_units(self, units_str): if (len(units_str) < 0) | (not (isinstance(units_str, str))): print( "incorrect value, please provide str with less than 20 chars") return self._data_units = str.encode(units_str, 'utf-8') self.section2_ts_dict['units_description'] = copy(self._data_units) @property def record_offset(self): return self._record_offset @record_offset.setter def record_offset(self, new_offset): self._record_offset = new_offset @property def mef_block_len(self): return self._mef_block_len @mef_block_len.setter def mef_block_len(self, new_mefblock_len): self._mef_block_len = new_mefblock_len
class mefdHandler(FileDataSource): def __init__(self): super(mefdHandler, self).__init__() self.name = 'Mef session' self.extension = '.mefd' self.session = None def password_check(self, password): try: if self.session is None: self.session = MefSession(self._path, password, False, check_all_passwords=False) return True except RuntimeError as e: return False def load_metadata(self): if self.session is None or self.session.session_md is None: self.session = MefSession(self._path, self._password, check_all_passwords=False) # Get information about the recording s_md = self.session.session_md['session_specific_metadata'] ts_md = self.session.session_md['time_series_metadata']['section_2'] ts_ch = self.session.session_md['time_series_channels'] channel_list = list(ts_ch.keys()) channel_list.sort() self.recording_info = {} self.recording_info['recording_start'] = s_md['earliest_start_time'][0] self.recording_info['recording_end'] = s_md['latest_end_time'][0] self.recording_info['recording_duration'] = ( ts_md['recording_duration'][0]) self.recording_info['extension'] = '.mefd' self.recording_info['nchan'] = len(channel_list) dmap = np.zeros(len(channel_list), dtype=[('fsamp', np.float), ('nsamp', np.int32), ('ufact', np.float), ('unit', np.object), ('channels', np.object), ('discontinuities', np.ndarray), ('ch_set', np.bool), ('uutc_ss', np.int64, 2)]) for i, channel in enumerate(channel_list): channel_md = ts_ch[channel] fsamp = channel_md['section_2']['sampling_frequency'][0] nsamp = channel_md['section_2']['number_of_samples'][0] ufact = channel_md['section_2']['units_conversion_factor'][0] unit = channel_md['section_2']['units_description'][0] unit = unit.decode("utf-8") channel_spec_md = channel_md['channel_specific_metadata'] start_time = channel_spec_md['earliest_start_time'][0] end_time = channel_spec_md['latest_end_time'][0] toc = self.session.get_channel_toc(channel) disc_stops = toc[3, toc[0] == 1] disc_starts = disc_stops - toc[1, toc[0] == 1] disconts = np.c_[disc_starts, disc_stops] dmap[i] = (fsamp, nsamp, ufact, unit, channel, disconts, True, [start_time, end_time]) self.data_map.setup_data_map(dmap) def _process_mef_records(self, records_list): """ Processes records generated by pymef and puts them into Pysigview. """ # Basic annotation columns basic_cols = ['start_time', 'end_time', 'channel'] dfs_out = {} for entry in records_list: rec_header = entry['record_header'] rec_body = entry['record_body'] rec_type = rec_header['type_string'][0] rec_type = rec_type.decode("utf-8") if rec_type not in dfs_out: ann_cols = [x[0] for x in rec_body.dtype.descr] cols = basic_cols + ann_cols dfs_out[rec_type] = pd.DataFrame(columns=cols) df = dfs_out[rec_type] ei = len(df) col_vals = {'start_time': rec_header['time'][0], 'end_time': np.nan, 'channel': np.nan} col_vals.update( dict([(x[0], rec_body[x[0]][0]) for x in rec_body.dtype.descr])) # Convert byte strings to normal strings for key, val in col_vals.items(): if type(val) == np.bytes_: col_vals[key] = (val.decode("utf-8")) df.loc[ei] = col_vals return dfs_out def get_annotations(self): """ Returns: -------- Annotations - in form of pandas DataFrame(s) """ if self.session is None or self.session.session_md is None: self.session = MefSession(self._path, self._password) dfs_out = {} session_md = self.session.session_md # Get session level records if 'records_info' in session_md.keys(): session_records = session_md['records_info']['records'] dfs_out.update(self._process_mef_records(session_records)) # Get channel level records for _, channel_d in session_md['time_series_channels'].items(): if 'records_info' in channel_d.keys(): ch_rec_list = channel_d['records_info']['records'] else: ch_rec_list = [] # Get segment level records for segment_d in channel_d['segments'].values(): if 'records_info' in segment_d.keys(): ch_rec_list += segment_d['records_info']['records'] dfs_out.update(self._process_mef_records(ch_rec_list)) return dfs_out def get_data(self, data_map): """ Parameters: ----------- data_map - DataMap instance for loading Returns: -------- The data in a list specified by channel_map """ channel_map = data_map.get_active_channels() uutc_map = data_map.get_active_uutc_ss() data = self.session.read_ts_channels_uutc(channel_map, uutc_map, out_nans=True) data_out = np.empty(len(data_map), object) for i in range(len(data_map)): data_out[i] = np.array([], dtype='float32') for i, ch in enumerate(channel_map): ch_pos = np.argwhere(data_map['channels'] == ch)[0][0] data_out[ch_pos] = data[i] return data_out