def spfilt( dataset, filt, io_blocksize = 1024, progress_bar = False ): """ Perform a spatial filter on an input two dimensional dataset. Returns dataset Keyword Arguments: dataset -- ndarray of size samples x channels filt -- ndarray spatial filter of channels x channels io_blocksize -- Number of samples to operate on at a time (Default: 1024) progress_bar -- If set to true, show a progress bar. (Default: False) TODO: Implement Parallelism """ start = 0 nsamp = len( dataset ) if progress_bar: from progress import ProgressBar pbar = ProgressBar( nsamp ) while True: end_block = start + io_blocksize end = min( end_block, nsamp - 1 ) dataset[ start:end, : ] = np.dot( dataset[ start:end, : ], filt ) start = end if progress_bar: pbar.animate( start ) if end_block >= nsamp: break return dataset
def resample_lts( lts, out_lts, new_rate = 5000, cutoff = 2000 ): """ Resample a LabeledTimeseries to an output LabeledTimeseries. Returns a LabeledTimeseries containing the new resampled data. Keyword Arguments: lts -- An input LabeledTimeseries to resample out_lts -- A LabeledTimeseries to output the resampled data to. If this is a string, create a new sibling LabeledTimeseries with this name. new_rate -- The sampling rate to resample to. (Default: 5000 samples per sec) cutoff -- The cutoff value for the FIR low-pass filter. Ensure this is set considerably *below* the nyquist of 'new_rate'. (Default: 2000 Hz) """ from h5eeg import LabeledTimeseries # Create the new LabeledTimeseries if necessary if isinstance( out_lts, basestring ): parent_group = lts.dataset.parent new_length = int( len( lts ) * ( float( new_rate ) / float( lts.get_rate() ) ) ) out_lts = LabeledTimeseries.create( parent_group, new_length, lts.get_labels(), new_rate, name = out_lts ) # Resample lts data channel by channel pbar = ProgressBar( len( lts.get_labels() ) ) for idx, ch_name in enumerate( lts.get_labels() ): print 'Resampling Channel: %s...' % ch_name ch_data = resample( lts[ :, ch_name ], lts.get_rate(), new_rate, cutoff ) convert_length = min( len( ch_data ), len( out_lts ) ) out_lts[ 0:convert_length, ch_name ] = ch_data[ 0:convert_length ] pbar.animate( idx ) # lts.dataset.file.flush() # This takes forever. Don't bother. return out_lts
def convert_bcistream( dat, h5filename = None, overwrite = False ): # If we don't have a filename for the resulting hdf5 file, we will # just make an hdf5 file from the current filename in the same directory if h5filename == None: filepath, h5filename = os.path.split( dat.filename ) h5filename = os.path.splitext( h5filename )[0] h5filename = os.path.join( filepath, h5filename + '.hdf5' ) if os.path.isfile( h5filename ) and not overwrite: print "Error: %s exists already. Not overwriting." % h5filename return None # Create the required group and set group attributes # NOTE: BCI2000 file format has no good record of experiment outfile = h5.File( h5filename, 'w' ) group = H5EEGGroup.create( outfile, subject = dat.params[ 'SubjectName' ], timestamp = dat.datestamp ) # Create the EEG, AUX, and Event datasets eeg_labels = [ str( i + 1 ) for i in range( dat.nchan ) ] if 'ChannelNames' in dat.params.keys(): eeg_labels = dat.params[ 'ChannelNames' ] eeg_offsets = dat.offsets.astype( 'int32' ) H5EEGDataset.create( group, dat.samples(), eeg_labels, eeg_offsets, dat.gains, name = 'raw', rate = dat.samplingrate(), bytes_per_sample = dat.bytesperchannel ) aux_labels = dat.statedefs.keys() H5EEGAuxDataset.create( group, dat.samples(), labels = aux_labels, rate = dat.samplingrate() ) H5EEGEvents.create( group ) # Read the data into the h5f file in blocks of 1 second each dat.seek( 0 ) eeg_dset = group.eeg().dataset aux_dset = group.aux().dataset print 'Converting %s to %s...' % ( dat.filename, h5filename ) pbar = ProgressBar( dat.samples() ) while dat.tell() != dat.samples(): samp_idx = dat.tell() read_block = int( dat.samplingrate() ) signal, states = dat.decode( nsamp = read_block, apply_gains = False ) read_block = signal.shape[1] eeg_dset[ samp_idx:( samp_idx + read_block ), : ] = signal.T for idx, label in enumerate( aux_labels ): aux_dset[ samp_idx:( samp_idx + read_block ), idx ] = np.squeeze( states[ label ] ) pbar.animate( dat.tell() ) return outfile
def listen( data, rate ): """ Listen to a one-dimensional numpy array Keyword Arguments: data -- A one-dimensional numpy array rate -- The data sampling rate. This has nothing to do with the sampling rate of the audio output from your speakers. Depends on pyaudio """ import pyaudio p = pyaudio.PyAudio() stream = p.open( format = pyaudio.paFloat32, channels = 1, rate = int( rate ), output = True ) data = np.squeeze( data ) data = data / max( abs( max( data ) ), abs( min( data ) ) ) start = 0 rate = int( rate ) read_block = rate / 4 # Read in 0.25 sec increments audio_length = len( data ) / float( rate ) pbar = ProgressBar( audio_length ) print 'Listening to data (%f sec)... Interrupt (^C) to stop.' % audio_length try: while start != len( data ): end = min( len( data ), start + read_block ) pbar.animate( start / float( rate ) ) stream.write( data[ start:end ].astype( 'float32' ).tostring() ) start = end except KeyboardInterrupt: pass stream.stop_stream() stream.close() p.terminate()
def frequency_feature( lts, events, length = None, band = ( 70, 110 ), smooth = 10, baseline_length = None, baseline_offset = 0, baseline_features = False, progress_bar = False, exclude_ch = [] ): """ Generate average frequency features from a LabeledTimeseries across events. <---------------------------------||||||||||||||||||||-------> data = [baseline_length][baseline_offset]| event[start_idx] |[length] <---------------------------------||||||||||||||||||||-------> Returns: ( features, obs_labels, ch_labels, t_labels ) features -- ndarray of frequency features ( obs (events) x ch x samples (time) ) obs_labels -- labels of the observation axis of features ch_labels -- labels of the ch axis of features. Equal to lts.get_labels() t_labels -- time labels in seconds. Keyword Arguments: lts -- LabeledTimeseries object to use as the input dataset. events -- An ndarray of event objects to generate features for. Hint: use H5EEGEvents.query_events(...) for this! length -- Length of the feature in seconds. This value overrides event duration and is used to define feature length for flag events. (Default: None -- length defined by event duration) band -- Frequency band in Hz defined by a tuple ( cuton, cutoff ). Data will be filtered by an FIR bandpass filter and hilbert-transformed to extract the power in that band. (Default: ( 70, 110 ) High Gamma smooth -- The power envelope is low-pass filtered to this frequency. Used to smooth feature output. Set to 0 for no additional smoothing (Default: 10 Hz) baseline_length -- The number of samples of 'baseline' period to collect before each event. This data segment will be have features extracted from which a baseline distribution will be defined per-electrode for Z-scoring. NOTE: Set baseline_length = 0 to disable baseline referencing. (Default: None -- baseline_length = length) baseline_offset -- The number of samples to offset before the event start_idx before the baseline period ends. See diagram above for clarification. (Default: 0 -- No samples of baseline offset) baseline_features -- Append features for the baseline periods rather than referencing/zscoring the features by baseline. NOTE: This will double the number of features/observations (Default: False -- Do not append baseline features) progress_bar -- Show a progress bar. (Default: False) Depends on BCPy2000 hilbert. FIXME: Remove Dependency TODO: Parallelism """ import BCPy2000.Paths from BCPy2000.SigTools.Basic import hilbert # Take sampling rate into account length = int( length * lts.get_rate() ) if baseline_length == None: baseline_length = length if baseline_length == None: baseline_length = 0 buffer_samples = int( 0.2 * lts.get_rate() ) # Calculate filter coefficients smooth = smooth / float( lts.get_rate() ) smooth_coefs = None if smooth != 0.0: smooth_coefs = firwin( 64, cutoff = smooth, window = "hamming" ) smooth_coefs = smooth_coefs / sum( smooth_coefs ) # Correct Gain # Determine channel indices def bad_ch( ch ): for q in exclude_ch: if q in ch: return True return False channels = np.array( [ ch for ch in lts.get_labels() if not bad_ch( ch ) ] ) features = [] obs_labels = [] def add_observation( name, feature ): if smooth_coefs != None: feature = filtfilt( smooth_coefs, [1.0], feature, axis = 0 ) features.append( feature.T ) obs_labels.append( name ) # Calculate features for every event matching the query if progress_bar: from progress import ProgressBar pbar = ProgressBar( len( events ) ) for idx, event in enumerate( events ): # Acquire information about the event name = event['name'] start_idx = event['start_idx'] duration = event['duration'] # Skip the event if we can't find a good length for it. # FIXME: Logic is off here. if duration == 0: duration = length if duration == None: continue; # Define start and end of feature extraction epoch end = start_idx + duration + buffer_samples start = start_idx - buffer_samples # Acquire the data for this spectrogram and calculate the spectrogram. observation = lts[ start:end, channels ] obs_features = hilbert( observation, band = band, return_dict = True, samplingfreq_hz = lts.get_rate() )[ 'amplitude' ] # Reference to baseline if we can if baseline_length != 0: # Acquire the baseline data baseline_start = start_idx - baseline_length - baseline_offset - buffer_samples baseline_end = start_idx - baseline_offset + buffer_samples baseline_data = lts[ baseline_start:baseline_end, channels ] # Calculate the baseline features and determine distribution obs_baseline = hilbert( baseline_data, band = band, return_dict = True, samplingfreq_hz = lts.get_rate() )[ 'amplitude' ] obs_baseline = obs_baseline[ buffer_samples:-buffer_samples, : ] if baseline_features == False: # Z-Score the features by the baseline distribution. # TODO: This isn't statistically sound, this assumes # independence in the time-series mu = np.mean( obs_baseline, axis = 0 ) sigma = np.std( obs_baseline, axis = 0 ) def zscore( data, mu, sigma ): return np.divide( np.subtract( data, mu ), sigma ) obs_features = np.apply_along_axis( zscore, 1, obs_features, mu, sigma ) else: add_observation( 'BASELINE_' + name, obs_baseline ) # Add the features to the list of features obs_features = obs_features[ buffer_samples:-buffer_samples, : ] add_observation( name, obs_features ) # Update the progress if progress_bar: pbar.animate( idx ) # Return the feature array features = np.array( features ) t_labels = np.arange( features.shape[-1] ) / float( lts.get_rate() ) return ( features, obs_labels, channels, t_labels )
def spectrogram( lts, channel, events, length = None, baseline_length = None, baseline_offset = 0, nfft = 1024, shift = 10, progress_bar = False ): """ Generate a spectrogram based on a query into the dataset. Internally, this uses matplotlib.mlab.specgram, so this is a FFT-based spectrogram. <---------------------------------||||||||||||||||||||-------> data = [baseline_length][baseline_offset]| event[start_idx] |[length] <---------------------------------||||||||||||||||||||-------> Returns: ( avg_spec, f, t ) avg_spec = Average spectrogram array f = frequency labels for axis 0 of avg_spec t = time labels for axis 1 of avg_spec Keyword Arguments: lts -- LabeledTimeseries object to use as the input dataset. events -- An ndarray of event objects to generate spectrograms for. Hint: use H5EEGEvents.query_events(...) for this! channel -- The channel within lts to create the spectrogram from. length -- Length of the spectrogram in samples. This value overrides event duration and is used to define spectrogram length for flag events. (Default: None -- length defined by event duration) baseline_length -- The number of samples of 'baseline' period to collect before each event. This data segment will be transformed into a spectrogram from which a baseline distribution will be defined per-frequency bin for Z-scoring. NOTE: Set baseline_length = 0 to disable baseline referencing. (Default: None -- baseline_length = length. If length == none, use event duration) baseline_offset -- The number of samples to offset before the event start_idx before the baseline period ends. See diagram above for clarification. (Default: 0 -- No samples of baseline offset) nfft -- Length of fft window. Should be a power of two. Used for both baseline and spectrogram. Should be shorter than length and baseline_length. (Default: 1024) shift -- Time shift of fft windows in samples. overlap = nfft - shift. (Default: 10) progress_bar -- Show a progress bar. (Default: False) FIXME: Specify lengths in seconds, because we have a samplingrate. TODO: Parallelism """ overlap = nfft - shift if baseline_length == None: baseline_length = length # Calculate a spectrogram for every event matching the query avg_spec = None num_spectrograms = 0.0 if progress_bar: from progress import ProgressBar pbar = ProgressBar( len( events ) ) for idx, event in enumerate( events ): # Acquire information about the event name = event['name'] start_idx = event['start_idx'] duration = event['duration'] # Skip the event if we can't find a good length for it. if duration == 0: duration = length if duration == None: continue; # Acquire the data for this spectrogram and calculate the spectrogram. data = lts[ start_idx:start_idx + duration, channel ] spec, f, t = specgram( data, NFFT = nfft, Fs = lts.get_rate(), noverlap = overlap ) spec = 20.0 * np.log10( spec ) # Reference to baseline if we can if baseline_length != 0: # Acquire the baseline data if baseline_length == None: b_len = duration else: b_len = baseline_length baseline_start = start_idx - b_len - baseline_offset baseline_end = start_idx - baseline_offset baseline_data = lts[ baseline_start:baseline_end, channel ] # Calculate the baseline spectrogram and determine distribution baseline_spec, f_base, t_base = specgram( baseline_data, NFFT = nfft, Fs = lts.get_rate(), noverlap = overlap ) baseline_spec = 20.0 * np.log10( baseline_spec ) mu = np.mean( baseline_spec, axis = 1 ) sigma = np.std( baseline_spec, axis = 1 ) # Z-Score the spectrogram by the baseline distribution. def zscore( data, mu, sigma ): return np.divide( np.subtract( data, mu ), sigma ) spec = np.apply_along_axis( zscore, 0, spec, mu, sigma ) # Add the spectrogram to the list of spectrograms if avg_spec == None: avg_spec = spec else: avg_spec = np.add( avg_spec, spec ) num_spectrograms += 1.0 # Update the progress if progress_bar: pbar.animate( idx ) # Sanity Check if num_spectrograms == 0.0: raise Warning( "No spectrograms generated -- Did you forget to specify Length?" ) return None, None, None # Average the plot and return the calculated parameters np.divide( avg_spec, num_spectrograms, out = avg_spec ) return avg_spec, f, t