def load_mTRF_audio(datadir, regressor='envelope', ntrl=15, stopband=((0, .5), (15, -1)), ofs=60, nvirt_out=30, verb=1): d = loadmat(datadir) X = d['EEG'] # (nSamp,d) Y = d[regressor] # (nSamp,e) Y = Y[:, np.newaxis, :] # (nSamp, nY, e) fs = d['Fs'][0][0] if ofs is None: ofs = fs # preprocess -> spectral filter, in continuous time! if stopband is not None: if verb > 0: print("preFilter: {}Hz".format(stopband)) X, _, _ = butter_sosfilt(X, stopband, fs, axis=-2) # generate artificial other stimulus streams, for testing Y_test = block_randomize(Y, nvirt_out, axis=-3, block_size=Y.shape[0] // ntrl // 2) Y = np.concatenate((Y, Y_test), -2) # (nSamp, nY, e) # slice X,Y into 'trials' if ntrl > 1: winsz = X.shape[0] // ntrl X = window_axis(X, axis=0, winsz=winsz, step=winsz) # (ntrl,nSamp,d) Y = window_axis(Y, axis=0, winsz=winsz, step=winsz) # (nTrl,nSamp,nY,e) else: X = [np.newaxis, ...] Y = [np.newaxis, ...] # preprocess -> downsample resamprate = int(fs / ofs) if resamprate > 1: if verb > 0: print("resample: {}->{}hz rsrate={}".format( fs, fs / resamprate, resamprate)) X = X[:, ::resamprate, :] # decimate X (trl, samp, d) Y = Y[:, ::resamprate, :] # decimate Y (trl, samp, y) fs = fs / resamprate # make meta-info coords = [None] * X.ndim coords[0] = {'name': 'trial'} coords[1] = { 'name': 'time', 'fs': fs, 'units': 'ms', 'coords': np.arange(X.shape[1]) * 1000 / fs } coords[2] = {'name': 'channel', 'coords': None} return (X, Y, coords)
def load_brainstream(datadir, sessdir=None, sessfn=None, ofs=60, ifs=None, fr=None, passband=None, stopband=((0,5),(25,-1)), verb=0): # load the data file Xfn = datadir if sessdir: Xfn = os.path.join(Xfn, sessdir) if sessfn: Xfn = os.path.join(Xfn, sessfn) sessdir = os.path.dirname(Xfn) data = loadmat(Xfn) if 'v' in data.keys(): # silly mat struct stuff.. data = data['v'] # [d x samp x trl ] #print("data.keys={}".format(data.keys())) ch_names = None if 'X' in data: # all in 1 file, raw plos_one format X = data['X'] # (d x samp x trl) cb = data['codes'] if 'codes' in data else data['V'] # (samples x nY) lab = data['y'] # (trl) elif 'data' in data: # pre-converted data X = data['data'] # [ d x samp_seq x nTrl ] cb = data['codebooks'] # [ samp_code x nY ] lab = data['labels'] # [ nTrl x 1] # index into codes array else: # extract info from other files... X = data trainmode = 'train' in fn # general config = codebook info cfgfn = os.path.join(datadir, sessdir, 'cfg.mat') try: cfg = loadmat(cfgfn) stim = cfg['stimulation'] fr = stim['rate']*stim['uprate'] if trainmode: subset = stim['trainsubset'] layout = stim['trainlayout'] cb = stim['U'] # [ samp_code x nY ] else: subset = stim['testsubset'] layout = stim['testlayout'] cb = stim['V'] # [ samp_code x nY ] cb = cb[:, trainlayout] cb = cb[:, trainlayout] # [ samp_code x nY ] except: raise ValueError('Couldnt load the configuration file!') # extract the header to get the sample rate hdrfn = os.path.join(datadir, sessdir, 'hdr.mat') try: hdr=loadmat(hdrfn) if 'v' in hdr.keys(): hdr = hdr['v'] ifs = hdr['Fs'][0] except: print('Warning: Couldnt load the header file'); # load per-trial labels if trainmode: labfn = os.path.join(datadir, sessdir, 'train'+'labels.mat') else: labfn = os.path.join(datadir, sessdir, 'test'+'labels.mat') lab = loadmat(labfn) if 'v' in lab.keys(): lab = lab['v'] # [ trl ] # get the sample rate, and downsample rate if ifs is None: # get the sample rate info if 'fs' in data: ifs = data['fs'][0][0] elif 'fSample' in data: ifs = data['fSample'][0][0] else: ifs = 180 if fr is None: if 'fr' in data: fr = data['fr'][0][0] else: fr = 60 fs = ifs if ofs is None: ofs = ifs X = X.astype("float32") # [ ch x samp x trl ]:float - raw eeg X = np.moveaxis(X, (0, 1, 2), (2, 1, 0)) # (nTrl, nSamp, d) X = np.ascontiguousarray(X) # ensure memory efficient layout lab = lab.astype("uint8").ravel() # [ trl ]:int - index into codebook cb = cb.astype("bool") # [ samp x nY ]:bool -- codebook # convert lab+code into Y + samp-times, where 1st row is always the true label cb = cb[np.mod(np.arange(0, X.shape[1]), cb.shape[0]), :] # loop cb up to X size [ samp x nY] Y = np.zeros((len(lab), X.shape[1], cb.shape[1]+1), dtype=bool) # (nTr, nSamp, nY) [nY x samp x trl] for ti, l in enumerate(lab): Y[ti, :, 0] = cb[:, l-1] # copy in true label Y[ti, :, 1:] = cb # copy in other outputs # preprocess -> spectral filter if stopband is not None or passband is not None: # BODGE: pre-center X X = X - X[...,0:1,:] if verb > 0: print("preFilter: {}Hz".format(stopband)) X, _, _ = butter_sosfilt(X, stopband, fs, passband=passband) # preprocess -> downsample resamprate = round(2*fs/ofs)/2 # round to nearest .5 if resamprate > 1: if 1 or verb > 0: print("resample by {}: {}->{}Hz".format(resamprate, fs, fs/resamprate)) idx = np.arange(0,X.shape[1],resamprate).astype(np.int) X = X[:, idx, :] # decimate X (trl, samp, d) Y = Y[:, idx, :] # decimate Y (trl, samp, y) fs = fs/resamprate # make coords array for the meta-info about the dimensions of X coords = [None]*X.ndim coords[0] = {'name':'trial'} coords[1] = {'name':'time','unit':'ms', \ 'coords':np.arange(X.shape[1]) *1000/fs, \ 'fs':fs} coords[2] = {'name':'channel', 'coords':ch_names} return (X, Y, coords)
ch_names = [ch_names[i] for i in range(len(ch_names)) if keep[i]] # ch-subset gigasubset=('C5','C3','C1','Cz','C2','C4','C6',\ 'FC5','FC3','FC1','FCz','FC2','FC4','FC6',\ 'CP5','CP3','CP1','CPz','CP2','CP4','CP6') minsubset = ('C3', 'C4') chsubset = minsubset keep = [c in chsubset for c in ch_names] X = X[..., keep] ch_names = [ch_names[i] for i in range(len(ch_names)) if keep[i]] plt.figure(100) plot_erp(X, lab, 'car') # hp-lp X, _, _ = butter_sosfilt(X, stopband=((0, 8), (16, -1)), fs=fs) plt.figure(101) plot_erp(X, lab, 'hp-lp', plotp=True) # whiten Cxx = updateCxx(None, X, None) W, _ = robust_whitener(Cxx) X = np.einsum("Tsd,dw->Tsw", X, W) #Cxxw=updateCxx(None,X,None) plt.figure(102) plot_erp(X, lab, 'wht') # welch freqs, X = welch(X, fs,
def load_twofinger(datadir, sessdir=None, sessfn=None, ofs=60, stopband=((0,1),(25,-1)), subtriallen=10, nvirt=20, verb=0, ch_idx=slice(32)): # load the data file Xfn = datadir if sessdir: Xfn = os.path.join(Xfn, sessdir) if sessfn: Xfn = os.path.join(Xfn, sessfn) sessdir = os.path.dirname(Xfn) data = loadmat(Xfn) def squeeze(v): while v.size == 1 and v.ndim > 0: v = v[0] return v fs = 512 ch_names = [c[0] for c in squeeze(data['chann']).ravel()] X = squeeze(data['X']) # (ch,samp) X = np.moveaxis(X,(0,1),(1,0)) # (samp,ch) X = X.astype(np.float32) X = np.ascontiguousarray(X) if ch_idx is not None: X = X[:, ch_idx] ch_names = ch_names[ch_idx] if verb>0: print("X={}".format(X.shape),flush=True) lab = squeeze(data['Y']).astype(int).ravel() # (samp,) # preprocess -> spectral filter, in continuous time! if stopband is not None: if verb > 0: print("preFilter: {}Hz".format(stopband)) X, _, _ = butter_sosfilt(X,stopband,fs) # make the targets, for the events we care about Y, lab2class = lab2ind(lab,marker2stim.values()) # (nTrl, e) # feature dim per class if verb>0: print("Y={}".format(Y.shape)) # preprocess -> downsample resamprate = int(fs/ofs) if resamprate > 1: if verb > 0: print("resample: {}->{}hz rsrate={}".format(fs, fs/resamprate, resamprate)) X = X[..., ::resamprate, :] # decimate X (trl, samp, d) # re-sample Y, being sure to keep any events in the re-sample window Y = window_axis(Y,winsz=resamprate,step=resamprate,axis=-2) # (trl, samp, win, e) Y = np.max(Y,axis=-2) # (trl,samp,e) N.B. use max so don't loose single sample events fs = fs/resamprate if verb > 0: print("X={}".format(X.shape)) print("Y={}".format(Y.shape)) # make virtual targets Y = Y[:,np.newaxis,:] # (nsamp,1,e) Y_virt = block_randomize(Y, nvirt, axis=-3) # (nsamp,nvirt,e) Y = np.concatenate((Y, Y_virt), axis=-2) # (nsamp,1+nvirt,e) if verb>0: print("Y={}".format(Y.shape)) # cut into sub-trials nsubtrials = X.shape[0]/fs/subtriallen if nsubtrials > 1: winsz = int(X.shape[0]//nsubtrials) if verb>0: print('subtrial winsz={}'.format(winsz)) # slice into sub-trials X = window_axis(X,axis=0,winsz=winsz,step=winsz) # (trl,win,d) Y = window_axis(Y,axis=0,winsz=winsz,step=winsz) # (trl,win,nY) if verb>0: print("X={}".format(X.shape)) print("Y={}".format(Y.shape)) # make coords array for the meta-info about the dimensions of X coords = [None]*X.ndim coords[0] = {'name':'trial'} coords[1] = {'name':'time','unit':'ms', \ 'coords':np.arange(X.shape[1])/fs, \ 'fs':fs} coords[2] = {'name':'channel','coords':ch_names} # return data + metadata return (X, Y, coords)
def load_brainsonfire(datadir, sessdir=None, sessfn=None, ofs=60, stopband=((0, 1), (25, -1)), subtriallen=10, nvirt=20, chIdx=slice(64), verb=2): # load the data file Xfn = datadir if sessdir: Xfn = os.path.join(Xfn, sessdir) if sessfn: Xfn = os.path.join(Xfn, sessfn) sessdir = os.path.dirname(Xfn) if verb > 1: print("Loading header") hdr = read_buffer_offline_header(Xfn) if verb > 1: print("Loading data") X = read_buffer_offline_data(Xfn, hdr) # (nsamp,nch) if verb > 1: print("Loading events") evts = read_buffer_offline_events(Xfn) fs = hdr.fs ch_names = hdr.labels if chIdx is not None: X = X[..., chIdx] ch_names = ch_names[chIdx] if ch_names is not None else None # pre-resample to save memory rsrate = int(fs // 120) if rsrate > 1: if verb > 0: print("Pre-re-sample by {}: {}->{}Hz".format( rsrate, fs, fs / rsrate)) X = X[::rsrate, :] for e in evts: e.sample = e.sample / rsrate fs = fs / rsrate if verb > 0: print("X={} @{}Hz".format(X.shape, fs), flush=True) # extract the trigger info trigevts = [e for e in evts if e.type.lower() == trigger_event] trig_samp = np.array([e.sample for e in trigevts], dtype=int) trig_val = [e.value for e in trigevts] trig_ind, lab2class = lab2ind( trig_val) # convert to indicator (ntrig,ncls) # up-sample to stim rate Y = np.zeros((X.shape[0], trig_ind.shape[-1]), dtype=bool) Y[trig_samp, :] = trig_ind if verb > 0: print("Y={}".format(Y.shape)) # BODGE: trim to useful data range if .1 < (trig_samp[0] - fs) / X.shape[0] or (trig_samp[-1] + fs) / X.shape[0] < .9: if verb > 0: print('Trimming range: {}-{}s'.format(trig_samp[0] / fs, trig_samp[-1] / fs)) # limit to the useful data range rng = slice(int(trig_samp[0] - fs), int(trig_samp[-1] + fs)) X = X[rng, :] Y = Y[rng, ...] if verb > 0: print("X={}".format(X.shape)) if verb > 0: print("Y={}".format(Y.shape)) # preprocess -> spectral filter, in continuous time! if stopband is not None: if verb > 0: print("preFilter: {}Hz".format(stopband)) X, _, _ = butter_sosfilt(X, stopband, fs) # preprocess -> downsample resamprate = int(fs / ofs) if resamprate > 1: if verb > 0: print("resample by {}: {}->{}Hz".format(resamprate, fs, fs / resamprate)) X = X[..., ::resamprate, :] # decimate X (trl, samp, d) # re-sample Y, being sure to keep any events in the re-sample window Y = window_axis(Y, winsz=resamprate, step=resamprate, axis=-2) # (trl, samp, win, e) Y = np.max( Y, axis=-2 ) # (trl,samp,e) N.B. use max so don't loose single sample events fs = fs / resamprate # make virtual targets Y = Y[:, np.newaxis, :] # (nsamp,1,e) Y_virt = block_randomize(Y, nvirt, axis=-3) # (nsamp,nvirt,e) Y = np.concatenate((Y, Y_virt), axis=-2) # (nsamp,1+nvirt,e) if verb > 0: print("Y={}".format(Y.shape)) # cut into sub-trials nsubtrials = X.shape[0] / fs / subtriallen if nsubtrials > 1: winsz = int(X.shape[0] // nsubtrials) if verb > 0: print('subtrial winsz={}'.format(winsz)) # slice into sub-trials X = window_axis(X, axis=0, winsz=winsz, step=winsz) # (trl,win,d) Y = window_axis(Y, axis=0, winsz=winsz, step=winsz) # (trl,win,nY) if verb > 0: print("X={}".format(X.shape)) if verb > 0: print("Y={}".format(Y.shape)) # make coords array for the meta-info about the dimensions of X coords = [None] * X.ndim coords[0] = {'name': 'trial'} coords[1] = {'name':'time','unit':'ms', \ 'coords':np.arange(X.shape[1])/fs, \ 'fs':fs} coords[2] = {'name': 'channel', 'coords': ch_names} # return data + metadata return (X, Y, coords)
def load_ninapro_db2(datadir, stopband=((0, 15), (45, 65), (95, 125), (250, -1)), envelopeband=(10, -1), trlen_ms=None, ofs=60, nvirt=20, rectify=True, whiten=True, log=True, plot=False, filterbank=None, zscore_y=True, verb=1): d = loadmat(datadir, variable_names=('emg', 'glove', 'stimulus')) X = d['emg'] # (nSamp,d) Y = d['glove'] # (nSamp,e) lab = d['stimulus'].ravel() # (nSamp,1) - use to slice out trials+labels fs = 2000 if ofs is None: ofs = fs # get trial start/end info trl_start = np.flatnonzero(np.diff(lab) > 0) lab = lab[trl_start + 1] print('trl_start={}'.format(trl_start)) print('label={}'.format(lab)) print("diff(trl_start)={}".format(np.diff(trl_start))) if trlen_ms is None: trlen_ms = np.max(np.diff(trl_start)) * 1000 / fs print('trlen_ms={}'.format(trlen_ms)) if not stopband is None: if verb > 0: print("preFilter: {}Hz".format(stopband)) X, _, _ = butter_sosfilt(X, stopband, fs) if plot: plt.figure(101) plt.plot(X) plt.title("hp+notch+lp") # preprocess -> spatial whiten # TODO[] : make this fit->transform method if whiten: if verb > 0: print("spatial whitener") Cxx = updateCxx(None, X, None) W, _ = robust_whitener(Cxx) X = np.einsum("sd,dw->sw", X, W) if plot: plt.figure(102) plt.plot(X) plt.title("+whiten") if not filterbank is None: if verb > 0: print("Filterbank: {}".format(filterbank)) # apply filter bank to frequency ranges into virtual channels Xs = [] # TODO: make a nicer shape, e.g. (tr,samp,band,ch) for bi, band in enumerate(filterbank): Xf, _, _ = butter_sosfilt(X, band, fs) Xs.append(Xf) # stack the bands as virtual channels X = np.concatenate(Xs, -1) X = np.abs(X) # rectify if log: if verb > 0: print("log amplitude") X = np.log(np.maximum(X, 1e-6)) if plot: plt.figure(103) plt.plot(X) plt.title("+abs") if envelopeband is not None: if verb > 0: print("Envelop band={}".format(envelopeband)) X, _, _ = butter_sosfilt(X, envelopeband, fs) # low-pass = envelope extraction if plot: plt.figure(104) plt.plot(X) plt.title("env") # preprocess -> downsample resamprate = int(fs / ofs) if resamprate > 1: if verb > 0: print("resample: {}->{}hz rsrate={}".format( fs, fs / resamprate, resamprate)) X = X[..., ::resamprate, :] # decimate X (trl, samp, d) Y = Y[..., ::resamprate, :] # decimate Y (trl, samp, e) trl_start = trl_start / resamprate fs = fs / resamprate # pre-process : z-trans Y if zscore_y: if verb > 0: print("Z-trans Y") mu = np.mean(Y, axis=-2, keepdims=True) std = np.std(Y, axis=-2, keepdims=True) std[std < 1e-6] = 1 # guard divide by 0 Y = (Y - mu) / std # generate artificial other stimulus streams, for testing # TODO: randomize in better way Y = Y[:, np.newaxis, :] # (nSamp, nY, e) Y_test = block_randomize(Y, nvirt, axis=-3, block_size=Y.shape[0] // 100 // 2) Y = np.concatenate((Y, Y_test), -2) # (nSamp, nY, e) # slice X,Y into trials oX = X # (nSamp,d) oY = Y # (nSamp,nY,e) trlen_samp = int(trlen_ms * fs / 1000) X = np.zeros((trl_start.size, trlen_samp, X.shape[-1])) Y = np.zeros((trl_start.size, trlen_samp) + Y.shape[-2:]) print("Slicing {} trials of {}ms".format(len(trl_start), trlen_ms)) for ti, tii in enumerate(trl_start): tii = int(tii) trl_len = min(oX.shape[0], tii + trlen_samp) - tii X[ti, :trl_len, ...] = oX[tii:tii + trl_len, ...] Y[ti, :trl_len, ...] = oY[tii:tii + trl_len, ...] # make meta-info coords = [None] * X.ndim coords[0] = {'name': 'trial', 'coords': lab} coords[1] = { 'name': 'time', 'fs': fs, 'units': 'ms', 'coords': np.arange(X.shape[1]) * 1000 / fs } coords[2] = {'name': 'channel', 'coords': None} return (X, Y, coords)
def extract_envelope(X, fs, stopband=None, whiten=True, filterbank=None, log=True, env_stopband=(10, -1), verb=False, plot=False): """extract the envelope from the input data Args: X ([type]): [description] fs ([type]): [description] stopband ([type], optional): pre-filter stop band. Defaults to None. whiten (bool, optional): flag if we spatially whiten before envelope extraction. Defaults to True. filterbank ([type], optional): set of filters to apply to extract the envelope for each filter output. Defaults to None. log (bool, optional): flag if we return raw power or log-power. Defaults to True. env_stopband (tuple, optional): post-filter on the extracted envelopes. Defaults to (10,-1). verb (bool, optional): verbosity level. Defaults to False. plot (bool, optional): flag if we plot the result of each preprocessing step. Defaults to False. Returns: X: the extracted envelopes """ from multipleCCA import robust_whitener from updateSummaryStatistics import updateCxx from utils import butter_sosfilt if plot: import matplotlib.pyplot as plt plt.figure(100) plt.clf() plt.plot(X[:int(fs * 10), :].copy()) plt.title("raw") if not stopband is None: if verb > 0: print("preFilter: {}Hz".format(stopband)) X, _, _ = butter_sosfilt(X, stopband, fs) if plot: plt.figure(101) plt.clf() plt.plot(X[:int(fs * 10), :].copy()) plt.title("hp+notch+lp") # preprocess -> spatial whiten # TODO[] : make this fit->transform method if whiten: if verb > 0: print("spatial whitener") Cxx = updateCxx(None, X, None) W, _ = robust_whitener(Cxx) X = np.einsum("sd,dw->sw", X, W) if plot: plt.figure(102) plt.clf() plt.plot(X[:int(fs * 10), :].copy()) plt.title("+whiten") if not filterbank is None: if verb > 0: print("Filterbank: {}".format(filterbank)) if plot: plt.figure(103) plt.clf() # apply filter bank to frequency ranges into virtual channels Xs = [] # TODO: make a nicer shape, e.g. (tr,samp,band,ch) # TODO[]: check doesn't modify in place for bi, band in enumerate(filterbank): Xf, _, _ = butter_sosfilt(X.copy(), band, fs) Xs.append(Xf) if plot: plt.subplot(len(filterbank), 1, bi + 1) plt.plot(Xf[:int(fs * 10), :]) plt.title("+filterbank {}".format(band)) # stack the bands as virtual channels X = np.concatenate(Xs, -1) X = np.abs(X) # rectify if plot: plt.figure(104) plt.plot(X[:int(fs * 10), :]) plt.title("+abs") if log: if verb > 0: print("log amplitude") X = np.log(np.maximum(X, 1e-6)) if plot: plt.figure(105) plt.clf() plt.plot(X[:int(fs * 10), :]) plt.title("+log") if env_stopband is not None: if verb > 0: print("Envelop band={}".format(env_stopband)) X, _, _ = butter_sosfilt(X, env_stopband, fs) # low-pass = envelope extraction if plot: plt.figure(104) plt.clf() plt.plot(X[:int(fs * 10), :]) plt.title("+env") return X
def load_openBMI(datadir, sessdir=None, sessfn=None, ofs=60, stopband=((0, 1), (25, -1)), CAR=False, verb=1, trlen_ms=None, offset_ms=(0, 0), ppMI=True): if offset_ms is None: offset_ms = (0, 0) # load the data file Xfn = datadir if sessdir: Xfn = os.path.join(Xfn, sessdir) if sessfn: Xfn = os.path.join(Xfn, sessfn) sessdir = os.path.dirname(Xfn) fn = os.path.basename(Xfn) print("Loading: {}".format(Xfn)) data = loadmat(Xfn) if 'EEG_SSVEP_train' in data: # TODO[] : merge training and test data = data['EEG_SSVEP_train'] elif 'EEG_ERP_train' in data: data = data['EEG_ERP_train'] elif 'EEG_MI_train' in data: data = data['EEG_MI_train'] def squeeze(v): while v.size == 1 and v.ndim > 0: v = v[0] return v fs = squeeze(data['fs']) ch_names = [d for d in data['chan'][0, :]] X = squeeze(data['x']) # (nSamp,d) X = np.asarray(X, order='C', dtype='float32') print("X={}".format(X.shape), flush=True) trl_idx = np.asarray(squeeze(data['t']).ravel(), dtype=int) # (nTrl) lab = np.asarray(squeeze(data['y_dec']).ravel(), dtype=int) # (nTrl) label for each trial if fs > 250: # pre-resample.... rr = int(fs // 250) print("pre-downsample : {} -> {}".format(fs, fs / rr)) X = X[::rr, ...] trl_idx = trl_idx // rr fs = fs / rr if 'ERP' in fn: ep_idx, ep_trl_idx = get_trl_ep_idx(trl_idx, fs * 2) trl_idx = ep_idx[:, 0] # make lab the right shape also lab0 = lab lab = np.zeros(ep_idx.shape, dtype=int) for ei, ti_idx in enumerate(ep_trl_idx): lab[ei, :len(ti_idx)] = lab0[ti_idx] # auto-determine the right trial length if trlen_ms is None: trlen_samp = np.max(ep_idx[:, -1] - ep_idx[:, 0]) trlen_ms = trlen_samp * 1000 / fs else: ep_idx = None if trlen_ms is None: if 'SSVEP' in Xfn: trlen_ms = SSVEP_STIM_DUR * 1000 elif 'MI' in Xfn: trlen_ms = MI_STIM_DUR * 1000 # delete the data variable to free the ram del data # preprocess -> spectral filter, in continuous time! if stopband is not None: if verb > 0: print("preFilter: {}Hz".format(stopband)) X, _, _ = butter_sosfilt(X, stopband, fs) if CAR: if verb > 0: print("CAR") X = X - np.mean(X, -1, keepdims=True) # pre-process -> band-power.... # N.B. *before* slicing to avoid startup artifacts. if "MI" in fn and ppMI: # make the target array # map X to (log)power in MI relevant frequency bands, # N.B. filter-band of **STOPBANDS** filterbank = (((0, 8), (16, -1)), ((0, 18), (30, -1)) ) # relevant frequency bands, mu~=10-14, beta~=20-25 env_stopband = ( 1, -1) # N.B. should be < min(filter-freq)/2 to smooth rectified print("map to envelope in bands {}".format(filterbank)) X = extract_envelope(X, fs, stopband=None, whiten=True, filterbank=filterbank, log=False, env_stopband=env_stopband) # update the channel label to include band info ch_names = [ "{} @{}hz".format(c[0], f[0]) for f in filterbank for c in ch_names ] if verb > 1: print("ch_names={}".format(ch_names)) # slice X trlen_samp = int(trlen_ms * fs / 1000) offset_samp = [int(o * fs / 1000) for o in offset_ms] # relative to start/end (0,trlen_samp) bgnend_samp = (offset_samp[0], trlen_samp + offset_samp[1] ) # start end slice window xlen_samp = bgnend_samp[1] - bgnend_samp[0] print("xslice_samp =[{} - {}] @ {}Hz".format(bgnend_samp[0], bgnend_samp[1], fs)) Xraw = X X = np.zeros((len(trl_idx), xlen_samp, Xraw.shape[-1]), dtype='float32') # (nTrl,nsamp,d) for ti, si in enumerate(trl_idx): X[ti, :, :] = Xraw[si + bgnend_samp[0]:si + bgnend_samp[1], :] del Xraw # extract the reference signals if 'SSVEP' in fn: # convert lab+code into Y + samp-times, where 1st row is always the true label if offset_samp[0] > 0 or offset_samp[1] < 0: # otherwise more complex to work out how to insert raise ValueError( "Only offset_ms which padds slice currently supported") else: cb_idx = slice(-offset_samp[0], -offset_samp[1] if offset_samp[1] > 0 else None) # make ssvep codebook cb = make_ssvep_ref_signals(trlen_samp, freqs=SSVEP_FREQS, fs=fs, phases=None) # (nY,nsamp) Y = np.zeros((len(lab), X.shape[1], cb.shape[1] + 1), dtype='float32') # (nTr, nSamp, nY) for ti, l in enumerate(lab): Y[ti, cb_idx, 0] = cb[:, l - 1] # copy in true label Y[ti, cb_idx, 1:] = cb # copy in other outputs elif 'ERP' in fn: # permute to make other possible entries cb_true = lab == 2 # target=flash (nTrl, nEp) cb_true = cb_true[:, :, np.newaxis, np.newaxis] # (nTrl,nEp,nY,e) # make 35 virtual targets cb = block_randomize(cb_true, 35, axis=-3) cb = np.concatenate((cb_true, cb), axis=-2) # [ ..., nY+1] # make into a sample rate label set Y = upsample_codebook(xlen_samp, cb, ep_idx, fs * ERP_STIM_DUR, offset_samp) # BODGE: strip the feature dim again.. Y = Y[..., 0] elif "MI" in fn: # make the target array # permute to make other possible entries cb_true, lab2class = lab2ind(lab) # (nTrl, e) # feature dim per class cb_true = cb_true[:, np.newaxis, :] # (nTrl,1,e) cb_all = np.eye(cb_true.shape[-1]) #(nvirt, e) cb_all = np.tile(cb_all, (cb_true.shape[0], 1, 1)) # (nTrl,nvirt,e) cb = np.append(cb_true, cb_all, axis=-2) # (nTrl, nvirt+1, e) cb = cb[:, np.newaxis, :, :] # (nTrl,1,nvirt+1,e) # make into a sample rate label set Y = upsample_codebook(xlen_samp, cb, None, fs * MI_STIM_DUR, offset_samp) #(nTrl,nSamp,nY,e) # preprocess -> downsample resamprate = int(fs / ofs) if resamprate > 1: if verb > 0: print("resample: {}->{}hz rsrate={}".format( fs, fs / resamprate, resamprate)) X = X[:, ::resamprate, :] # decimate X (trl, samp, d) Y = Y[:, ::resamprate, :] # decimate Y (trl, samp, y) fs = fs / resamprate stimTimes = None # non-sliced output # make coords array for the meta-info about the dimensions of X coords = [None] * X.ndim coords[0] = {'name': 'trial', 'coords': trl_idx, 'lab': lab} coords[1] = {'name':'time','unit':'ms', \ 'coords':np.linspace(offset_ms[0],trlen_ms+offset_ms[1],X.shape[1]), \ 'fs':fs} coords[2] = {'name': 'channel', 'coords': ch_names} # return data + metadata return (X, Y, coords)
def load_mark_EMG(datadir, sessdir=None, sessfn=None, ofs=60, stopband=((0,10),(45,55),(95,105),(145,-1)), filterbank=None, verb=0, log=True, whiten=True, plot=False): fs=1000 ch_names=None # load the data file Xfn = os.path.expanduser(datadir) if sessdir: Xfn = os.path.join(Xfn, sessdir) if sessfn: Xfn = os.path.join(Xfn, sessfn) sessdir = os.path.dirname(Xfn) print("Loading {}".format(Xfn)) data = loadmat(Xfn) def squeeze(v): while v.size == 1 and v.ndim > 0: v = v[0] return v X = np.array([squeeze(d['buf']) for d in squeeze(data['data'])]) # ( nTrl, nch, nSamp) X = np.moveaxis(X,(0,1,2),(0,2,1)) # ( nTr, nSamp, nCh) X = np.ascontiguousarray(X) # ensure memory efficient lab = np.array([squeeze(e['value']) for e in data['devents']],dtype=int) # (nTrl,) import matplotlib.pyplot as plt if plot: plt.figure(100);plt.plot(X[0,:,:]);plt.title("raw") # preprocess -> spectral filter, in continuous time! if stopband is not None: if verb > 0: print("preFilter: {}Hz".format(stopband)) X, _, _ = butter_sosfilt(X,stopband,fs) if plot:plt.figure(101);plt.plot(X[0,:,:]);plt.title("hp+notch+lp") # preprocess -> spatial whiten # TODO[] : make this fit->transform method if whiten: print("spatial whitener") Cxx = updateCxx(None,X,None) W,_ = robust_whitener(Cxx) X = np.einsum("tsd,dw->tsw",X,W) if plot:plt.figure(102);plt.plot(X[0,:,:]);plt.title("+whiten") if not filterbank is None: if verb > 0: print("Filterbank: {}".format(filterbank)) # apply filter bank to frequency ranges into virtual channels Xs=[] # TODO: make a nicer shape, e.g. (tr,samp,band,ch) for bi,band in enumerate(filterbank): Xf, _, _ = butter_sosfilt(X,band,fs) Xs.append(Xf) # stack the bands as virtual channels X = np.concatenate(Xs,-1) X = np.abs(X) # rectify if log: print("log amplitude") X = np.log(np.maximum(X,1e-6)) if plot:plt.figure(103);plt.plot(X[0,:,:]);plt.title("+abs") X, _, _ = butter_sosfilt(X,(40,-1),fs) # low-pass = envelope extraction if plot:plt.figure(104);plt.plot(X[0,:,:]);plt.title("env") # preprocess -> downsample @60hz resamprate=int(fs/ofs) if resamprate > 1: if verb > 0: print("resample: {}->{}hz rsrate={}".format(fs,ofs,resamprate)) X = X[:, ::resamprate, :] # decimate X (trl, samp, d) fs = fs/resamprate # get Y Y_true, lab2class = lab2ind(lab) # (nTrl, e) Y_true = Y_true[:, np.newaxis, :] # ( nTrl,1,e) # TODO[] : exhaustive list of other targets... Yall = np.eye(Y_true.shape[-1],dtype=bool) # (nvirt,e) Yall = np.tile(Yall,(Y_true.shape[0],1,1)) # (nTrl,nvirt,e) Y = np.append(Y_true,Yall,axis=-2) # (nTrl,nvirt+1,e) # upsample to ofs Y = np.tile(Y[:,np.newaxis,:,:],(1,X.shape[1],1,1)) # (nTrl, nSamp, nY, e) Y = Y.astype(np.float32) # make coords array for the meta-info about the dimensions of X coords = [None]*X.ndim coords[0] = {'name':'trial'} coords[1] = {'name':'time','unit':'ms', \ 'coords':np.arange(X.shape[1])*1000/fs, \ 'fs':fs} coords[2] = {'name':'channel','coords':ch_names} # return data + metadata return (X, Y, coords)
def load_cocktail(datadir, sessdir=None, sessfn=None, ofs=60, stopband=((0, 5), (25, -1)), verb=0, trlen_ms=None, subtriallen=10): # load the data file Xfn = os.path.expanduser(datadir) if sessdir: Xfn = os.path.join(Xfn, sessdir) if sessfn: Xfn = os.path.join(Xfn, sessfn) sessdir = Xfn if os.path.isdir(Xfn) else os.path.dirname(Xfn) stimdir = os.path.join(sessdir, '..', '..', 'Stimuli', 'Envelopes') runfns = glob(os.path.join(sessdir, '*.mat')) # extract subId (to get attended stream) subid = int(sessdir.split("Subject")[1].split("_")[0]) attended_book = subids_attended_book[subid] # extract run id runid = [int(f.split("Run")[1].split(".mat")[0]) for f in runfns] # sort into numeric order sorted_id = argsort(runid) runfns = [(runid[i], runfns[i]) for i in sorted_id] # load the raw EEG data data = [None] * len(runid) stim = [None] * len(runid) print("Run:", end='') for i, (ri, rf) in enumerate(runfns): print("{} ".format(ri), end='', flush=True) data[i] = loadmat(rf) # load stim[i] = [ loadmat( os.path.join(stimdir, book, "{}_{}_env.mat".format(book, ri))) for book in books ] # make a label list for the trials lab = [books.index(attended_book)] * len(data) fs = squeeze(data[0]['fs']) if not all(squeeze(d['fs']) == fs for d in data): raise ValueError("Different sample rates in different runs") #if not all(d['fsEnv'] == fs for d in stim): # raise valueError("Different samples rates in between EEG and Envelope") # make the X and Y arrays X0 = data[0]['eegData'] Y0 = stim[0][0]['envelope'] nSamp = min(X0.shape[0], Y0.shape[0]) d = X0.shape[1] e = Y0.shape[1] X = np.zeros((len(runid), nSamp, d), dtype='float32') Y = np.zeros((len(runid), nSamp, 1 + len(stim[0]), e), dtype='float32') # (nTr,nSamp,nY,e) for ti, (d, s) in enumerate(zip(data, stim)): X[ti, :, :] = d['eegData'][:nSamp, :] Y[ti, :, 0, :] = s[lab[ti]]['envelope'][:nSamp, :] # objID==0 is attended for si, ss in enumerate(s): # all possible stimuli Y[ti, :, si + 1, :] = ss['envelope'][:nSamp, :] print("X={}".format(X.shape), flush=True) # preprocess -> spectral filter, in continuous time! if stopband is not None: if verb > 0: print("preFilter: {}Hz".format(stopband)) X, _, _ = butter_sosfilt(X, stopband, fs) # preprocess -> downsample resamprate = int(fs / ofs) if resamprate > 1: if verb > 0: print("resample: {}->{}hz rsrate={}".format(fs, ofs, resamprate)) X = X[:, ::resamprate, :] # decimate X (trl, samp, d) Y = Y[:, ::resamprate, ...] # decimate Y (trl, samp, y, e) fs = fs / resamprate nsubtrials = X.shape[1] / fs / subtriallen if nsubtrials > 1: winsz = int(X.shape[1] // nsubtrials) print('{} subtrials -> winsz={}'.format(nsubtrials, winsz)) # slice into sub-trials X = window_axis(X, axis=1, winsz=winsz, step=winsz) # (trl,win,samp,d) Y = window_axis(Y, axis=1, winsz=winsz, step=winsz) # (trl,win,samp,nY) # concatenate windows into trial dim X = X.reshape((X.shape[0] * X.shape[1], ) + X.shape[2:]) Y = Y.reshape((Y.shape[0] * Y.shape[1], ) + Y.shape[2:]) print("X={}".format(X.shape)) # make coords array for the meta-info about the dimensions of X coords = [None] * X.ndim coords[0] = {'name': 'trial'} coords[1] = {'name':'time','unit':'ms', \ 'coords':np.arange(X.shape[1])/fs, \ 'fs':fs} coords[2] = {'name': 'channel', 'coords': ch_names} # return data + metadata return (X, Y, coords)
def load_p300_prn(datadir, sessdir=None, sessfn=None, ofs=60, offset_ms=(-1000,1000), ifs=None, fr=None, stopband=((0,1), (25,-1)), order=6, subtriallen=10, verb=0, nvirt=20, chidx=slice(64)): # load the data file Xfn = datadir if sessdir: Xfn = os.path.join(Xfn, sessdir) if sessfn: Xfn = os.path.join(Xfn, sessfn) sessdir = os.path.dirname(Xfn) try: data = loadmat(Xfn) except NotImplementedError: # TODO[] : make this work correctly -- HDF5 field access is different to loadmat's #import h5py #data = h5py.File(Xfn, 'r') pass X = data['X'] X = X.astype("float32") # [ ch x samp x trl ]:float - raw eeg X = np.moveaxis(X, (0, 1, 2), (2, 1, 0)) # (nTrl, nSamp, d) X = np.ascontiguousarray(X) # ensure memory efficient layout ch_names = np.stack(data['di'][0]['vals'][0][0]).ravel() # Extract the sample rate. Argh!, why so deeply nested? if ifs is None: fs = data['di'][1]['info'][0]['fs'][0,0][0,0] else: fs = ifs extrainfo = data['di'][2]['extra'][0] try: Ye = np.stack(extrainfo['flipgrid'][0], -1) # (nY,nEp,nTrl) except: Ye = None Ye0= np.stack(extrainfo['flash'][0], -1) # true-target (1,nEp,nTrl) tgtLetter = extrainfo['target'] # target letter, not needed samptimes = data['di'][1]['vals'][0].ravel() # (nSamp,) flashi_ms = np.stack(extrainfo['flashi_ms'][0], -1) #(1,nEp,nTrl) # convert flashi_ms to flashi_samp and upsampled Ye to sample rate Ye0= np.moveaxis(Ye0, (0, 1, 2), (2, 1, 0)) # (nTrl, nEp, 1) stimTimes_ms = np.moveaxis(flashi_ms, (0, 1, 2), (2, 1, 0)) # (nTrl, nEp, 1) if Ye is not None: Ye = np.moveaxis(Ye, (0, 1, 2), (2, 1, 0)) # (nTrl, nEp, nY) else: # make a pseudo-set of alternative targets Ye = block_randomize(Ye0[...,np.newaxis],nvirt,-3) #(nTrl,nEp,nvirt,1) Ye = Ye[...,0] # (nTrl,nEp,nvirt) print("{} virt targets".format(Ye.shape[-1])) # upsample to sample rate stimTimes_samp = np.zeros(stimTimes_ms.shape, dtype=int) # index from trial start for each flash Y = np.zeros(X.shape[:-1]+(Ye.shape[-1]+Ye0.shape[-1],), dtype='float32') # (nTrl, nEP, nY+1) for ti in range(Y.shape[0]): lastflash = None flashi_trli = stimTimes_ms[ti, :, 0] for fi, flash_time_ms in enumerate(flashi_trli): # find nearest sample time si = np.argmin(np.abs(samptimes - flash_time_ms)) stimTimes_samp[ti, fi, 0] = si if lastflash: # hold until new values Y[ti, lastflash+1:si, :] = Y[ti, lastflash, :] Y[ti, si, 0] = Ye0[ti, fi, 0] # true info always 1st row Y[ti, si, 1:] = Ye[ti, fi, :] # rest possiblities lastflash = si # for comparsion... #print("{}".format(np.array(np.mean(stimTimes_samp, axis=0),dtype=int).ravel())) # preprocess -> ch-seln if chidx is not None: X=X[...,chidx] ch_names = ch_names[chidx] # Trim to useful data range stimRng = ( np.min(stimTimes_samp[:,0,0]+offset_ms[0]*fs/1000), np.max(stimTimes_samp[:,-1,0]+offset_ms[1]*fs/1000) ) print("stimRng={}".format(stimRng)) if 0 < stimRng[0] or stimRng[1] < X.shape[-2]: if verb>-1 : print('Trimming range: {}-{}ms'.format(stimRng[0]/fs,stimRng[-1]/fs)) # limit to the useful data range rng = slice(int(max(0,stimRng[0])), int(min(X.shape[-2],stimRng[1]))) X = X[..., rng, :] Y = Y[..., rng, :] if verb > 0: print("X={}".format(X.shape)) if verb > 0: print("Y={}".format(Y.shape)) # preprocess -> spectral filter if stopband is not None: if verb > 0: print("preFilter: {}Hz".format(stopband)) X, _, _ = butter_sosfilt(X,stopband,fs,order=order) # preprocess -> downsample resamprate = int(round(fs/ofs)) if resamprate > 1: if verb > 0: print("resample: {}->{}hz rsrate={}".format(fs, ofs, resamprate)) X = X[:, ::resamprate, :] # decimate X (trl, samp, d) Y = Y[:, ::resamprate, :] # decimate Y (trl, samp, y) fs = fs/resamprate nsubtrials = X.shape[1]/fs/subtriallen if subtriallen is not None else 0 if nsubtrials > 1: winsz = int(X.shape[1]//nsubtrials) print('{} subtrials -> winsz={}'.format(nsubtrials,winsz)) # slice into sub-trials X = window_axis(X,axis=1,winsz=winsz,step=winsz) # (trl,win,samp,d) Y = window_axis(Y,axis=1,winsz=winsz,step=winsz) # (trl,win,samp,nY) # concatenate windows into trial dim X = X.reshape((X.shape[0]*X.shape[1],)+X.shape[2:]) Y = Y.reshape((Y.shape[0]*Y.shape[1],)+Y.shape[2:]) if verb>0 : print("X={}".format(X.shape)) # make coords array for the meta-info about the dimensions of X coords = [None]*X.ndim coords[0] = {'name':'trial','coords':np.arange(X.shape[0])} coords[1] = {'name':'time','unit':'ms', \ 'coords':np.arange(X.shape[1]) * 1000/fs, \ 'fs':fs} coords[2] = {'name':'channel','coords':ch_names} return (X, Y, coords)
def load_mindaffectBCI(datadir, sessdir=None, sessfn=None, ofs=60, stopband=((0, 1), (25, -1)), order=6, verb=0, iti_ms=1000, trlen_ms=None, offset_ms=(-1000, 1000)): # load the data file Xfn = datadir if sessdir: Xfn = os.path.join(Xfn, sessdir) if sessfn: Xfn = os.path.join(Xfn, sessfn) sessdir = os.path.dirname(Xfn) if verb > 1: print("Loading {}".format(Xfn)) X, messages = read_mindaffectBCI_data_messages(Xfn) # strip the data time-stamp channel data_ts = X[..., -1] # (nsamp,) X = X[..., :-1] # (nsamp,nch) # estimate the sample rate from the data. dur_s = (data_ts[-1] - data_ts[0]) / 1000.0 fs = X.shape[0] / dur_s ch_names = None if verb > 0: print("X={} @{}Hz".format(X.shape, fs), flush=True) # extract the stimulus sequence Me, stim_ts, objIDs, _ = devent2stimSequence(messages) # up-sample to stim rate Y, stim_samp = upsample_stimseq(data_ts, Me, stim_ts, objIDs) if verb > 0: print("Y={} @{}Hz".format(Y.shape, fs), flush=True) # preprocess -> spectral filter, in continuous time! if stopband is not None: if verb > 0: print("preFilter: {}Hz".format(stopband)) X, _, _ = butter_sosfilt(X, stopband, fs, order=order) # slice into trials # isi = interval *before* every stimulus -- # include data-start so includes 1st stimulus isi = np.diff( np.concatenate((data_ts[0:1], stim_ts, data_ts[-2:-1]), axis=0)) #print('isi={}'.format(isi)) # get trial indices in stimulus messages as sufficiently large inter-stimulus gap # N.B. this is the index in stim_ts of the *start* of the new trial trl_stim_idx = np.flatnonzero(isi > iti_ms) # get duration of stimulus in each trial trl_dur = stim_ts[trl_stim_idx[1:] - 1] - stim_ts[trl_stim_idx[:-1]] # estimate the best trial-length to use if trlen_ms is None: trlen_ms = np.median(trl_dur) # strip any trial too much shorter than trlen_ms (50%) keep = np.flatnonzero(trl_dur > trlen_ms * .5) # re-compute the trlen_ms for the good trials trl_stim_idx = trl_stim_idx[keep] # compute the trial start/end relative to the trial-start trlen_samp = int(trlen_ms * fs / 1000) offset_samp = [int(o * fs / 1000) for o in offset_ms] bgnend_samp = (offset_samp[0], trlen_samp + offset_samp[1] ) # start end slice window xlen_samp = bgnend_samp[1] - bgnend_samp[0] # get the trial starts as indices & ms into the data array trl_samp_idx = stim_samp[trl_stim_idx] trl_ts = stim_ts[trl_stim_idx] # extract the slices Xraw = X.copy() Yraw = Y.copy() X = np.zeros((len(trl_samp_idx), xlen_samp, Xraw.shape[-1]), dtype=Xraw.dtype) # (nTrl,nsamp,d) Y = np.zeros((len(trl_samp_idx), xlen_samp, Yraw.shape[-1]), dtype=Yraw.dtype) print("slicing {} trials =[{} - {}] samples @ {}Hz".format( len(trl_samp_idx), bgnend_samp[0], bgnend_samp[1], fs)) for ti, si in enumerate(trl_samp_idx): idx = slice(si + bgnend_samp[0], si + bgnend_samp[1]) X[ti, :, :] = Xraw[idx, :] Y[ti, :, :] = Yraw[idx, :] del Xraw, Yraw if verb > 0: print("X={}\nY={}".format(X.shape, Y.shape)) # preprocess -> downsample #resamprate = int(round(fs/ofs)) #if resamprate > 1: # if verb > 0: # print("resample by {}: {}->{}Hz".format(resamprate, fs, fs/resamprate)) # X = X[..., ::resamprate, :] # decimate X (trl, samp, d) # Y = Y[..., ::resamprate, :] # decimate Y (OK as we latch Y) # fs = fs/resamprate resamprate = round(2 * fs / ofs) / 2 # round to nearest .5 if resamprate > 1: print("resample by {}: {}->{}Hz".format(resamprate, fs, fs / resamprate)) # TODO []: use better re-sampler also in ONLINE idx = np.arange(0, X.shape[1], resamprate).astype(np.int) X = X[..., idx, :] # decimate X (trl, samp, d) Y = Y[..., idx, :] # decimate Y (OK as we latch Y) fs = fs / resamprate # make coords array for the meta-info about the dimensions of X coords = [None] * X.ndim coords[0] = {'name': 'trial', 'coords': trl_ts} coords[1] = {'name':'time','unit':'ms', \ 'coords':np.arange(X.shape[1])/fs, \ 'fs':fs} coords[2] = {'name': 'channel', 'coords': ch_names} # return data + metadata return (X, Y, coords)