def filtfiltlong(finname, foutname, fmt, b, a, buffer_len=100000, overlap_len=100, max_len=-1): """Use memmap and chunking to filter continuous data. Inputs: finname - foutname - fmt - data format eg 'i' b,a - filter coefficients buffer_len - how much data to process at a time overlap_len - how much data do we add to the end of each chunk to smooth out filter transients max_len - how many samples to process. If set to -1, processes the whole file Outputs: y - The memmapped array pointing to the written file Notes on algorithm: 1. The arrays are memmapped, so we let pylab (numpy) take care of handling large arrays 2. The filtering is done in chunks: Chunking details: |<------- b1 ------->||<------- b2 ------->| -----[------*--------------{-----*------]--------------*------}---------- |<-------------- c1 -------------->| |<-------------- c2 -------------->| From the array of data we cut out contiguous buffers (b1,b2,...) and to each buffer we add some extra overlap to make chunks (c1,c2). The overlap helps to remove the transients from the filtering which would otherwise appear at each buffer boundary. """ x = pylab.memmap(finname, dtype=fmt, mode='r') if max_len == -1: max_len = x.size y = pylab.memmap(foutname, dtype=fmt, mode='w+', shape=max_len) for buff_st_idx in xrange(0, max_len, buffer_len): chk_st_idx = max(0, buff_st_idx - overlap_len) buff_nd_idx = min(max_len, buff_st_idx + buffer_len) chk_nd_idx = min(x.size, buff_nd_idx + overlap_len) rel_st_idx = buff_st_idx - chk_st_idx rel_nd_idx = buff_nd_idx - chk_st_idx this_y_chk = filtfilt(b, a, x[chk_st_idx:chk_nd_idx]) y[buff_st_idx:buff_nd_idx] = this_y_chk[rel_st_idx:rel_nd_idx] return y
def read_all_csc(data_folder, dtype='int16', assume_same_fs=True, memmap=False, memmap_folder=None, save_for_spikedetekt=False, channels_to_save=None, return_sliced_data=False): if sys.version_info[0] > 2: mode = 'br' else: mode = 'r' os_name = platform.system() if os_name == 'Windows': sep = '\\' elif os_name=='Linux': sep = r'/' files = [os.path.join(data_folder, f) for f in os.listdir(data_folder) if f.endswith('.ncs')] order = [int(file.split('.')[0].split('CSC')[1]) for file in files] sort_order = sorted(range(len(order)),key=order.__getitem__) ordered_files = [files[i] for i in sort_order] if memmap: if not memmap_folder: raise NameError("A memmap_folder should be defined for memmapped data") out_filename = data_folder.split(sep)[-1]+'.dat' out_full_filename = os.path.join(memmap_folder, out_filename) data = None i = 0; for file in ordered_files: fin = open(file, mode=mode) x = read_single_csc(fin, assume_same_fs=assume_same_fs, memmap=memmap) if not assume_same_fs or memmap: channel_data = x['packets']['samp'].ravel() if data is None: data = pylab.memmap(out_full_filename, dtype=dtype, mode='w+', shape=(pylab.size(files), channel_data.size)) else: data[i,:] = channel_data data.flush() i = i+1 print(i) else: channel_data = x['trace'] if data is None: data = pylab.zeros(shape=(pylab.size(files), channel_data.size), dtype=dtype) else: data[i,:] = channel_data i = i+1 print(i) data_to_return = data if save_for_spikedetekt: if channels_to_save: data2 = data[channels_to_save,:] if return_sliced_data: data_to_return = data2 else: data2 = data data2 = pylab.transpose(data2) data2.reshape(data2.size) filename = os.path.join(memmap_folder, 'spikedetekt_'+out_filename) data2.astype(dtype).tofile(filename) return data_to_return
def read_extracted_data(fname, type='addata'): """Reads data file extracted by extract_nrd. Inputs: fname - name of the file we want to read. type - type of the data. Has to be one of 'ts','ttl' or 'addata' 'ts' - time stamps which are uint64 and give values in microseconds 'ttl' - the parallel port input which is uint32 'addata' - the continuous A/D channel data which is int32 Output: data - pylab array of appropriate type """ if type == 'ts': fmt = 'Q' elif type == 'ttl': fmt = 'I' elif type == 'addata': fmt = 'i' else: logger.error('Unrecognized data type {:s}'.format(type)) return None return pylab.memmap(fname, dtype=fmt, mode='r')
#----------Data generation----------------- data = lio.read_all_csc(folder, assume_same_fs=False, memmap=True, memmap_folder=memap_folder, save_for_spikedetekt=False, channels_to_save=None, return_sliced_data=False) pl.save(os.path.join(memap_folder, 'B14R9_raw.npy'), data) data_ecog = data[:64, :] data_probe = data[64:, :] data_probe_hp = pl.memmap(os.path.join(memap_folder, 'data_probe_hp.dat'), dtype='int16', mode='w+', shape=pl.shape(data_probe)) for i in pl.arange(0, pl.shape(data_probe)[0]): data_probe_hp[i, :] = filters.high_pass_filter(data_probe[i, :], Fsampling=f_sampling, Fcutoff=f_hp_cutoff) data_probe_hp.flush() print(i) pl.save(os.path.join(memap_folder, 'data_probe_hp.npy'), data_probe_hp) shape_data_ss = (pl.shape(data_ecog)[0], pl.shape(data_ecog)[1] / int(f_sampling / f_subsample)) data_ecog_lp_ss = pl.memmap(os.path.join(memap_folder, 'data_ecog_lp_ss.dat'), dtype='int16', mode='w+', shape=shape_data_ss)
phases_all_shaftA = pl.load(os.path.join(memap_folder, 'phases_all_shaftA.npy'), mmap_mode=None) phases_all_shaftC = pl.load(os.path.join(memap_folder, 'phases_all_shaftC.npy'), mmap_mode=None) data = pl.load(os.path.join(memap_folder,'B14R9_raw.npy'), mmap_mode='r+') #----------Data generation----------------- data = lio.read_all_csc(folder, assume_same_fs=False, memmap=True, memmap_folder=memap_folder, save_for_spikedetekt=False, channels_to_save=None, return_sliced_data=False) pl.save(os.path.join(memap_folder, 'B14R9_raw.npy'), data) data_ecog = data[:64,:] data_probe = data[64:,:] data_probe_hp = pl.memmap(os.path.join(memap_folder,'data_probe_hp.dat'), dtype='int16', mode='w+', shape=pl.shape(data_probe)) for i in pl.arange(0, pl.shape(data_probe)[0]): data_probe_hp[i,:] = filters.high_pass_filter(data_probe[i,:], Fsampling=f_sampling, Fcutoff=f_hp_cutoff) data_probe_hp.flush() print(i) pl.save(os.path.join(memap_folder, 'data_probe_hp.npy'), data_probe_hp) shape_data_ss = (pl.shape(data_ecog)[0], pl.shape(data_ecog)[1]/int(f_sampling/f_subsample)) data_ecog_lp_ss = pl.memmap(os.path.join(memap_folder, 'data_ecog_lp_ss.dat'), dtype='int16', mode='w+', shape=shape_data_ss) for i in pl.arange(0, pl.shape(data_ecog)[0]): data_ecog_lp_ss[i,:] = signal.decimate(filters.low_pass_filter(data_ecog[i,:], Fsampling=f_sampling, Fcutoff=f_lp_cutoff), int(f_sampling/f_subsample)) data_ecog_lp_ss.flush() print(i) pl.save(os.path.join(memap_folder, 'data_ecog_lp_ss.npy'), data_ecog_lp_ss)
def read_single_csc(fin, assume_same_fs=True, memmap=False): """Read a continuous record file. We return the raw packets but, in addition, if we set assume_same_fs as true we return a trace with all the data concatenated together, assuming that a constant sampling frequency was maintained through out. Gaps in the record are padded with zeros. Input: fin - file handle assume_same_fs - if True, concatenate any segments together, fill time gaps with zeros and return average Fs Ouput: Dictionary with fields 'header' - the file header 'packets' - the actual packets as read. This is a new pylab dtype with fields: 'timestamp' - timestamp (us) 'chan' - channel 'Fs' - the sampling frequency 'Ns' - the number of valid samples in the packet 'samp' - the samples in the packet. e.g. x['packets']['samp'] will return a 2D array, number of packets long and 512 wide (since each packet carries 512 wave points) similarly x['packets']['timestamp'] will return an array number of packets long 'Fs': the average frequency computed from the timestamps (can differ from the nominal frequency the device reports) 'trace': the concatenated data from all the packets 't0': the timestamp of the first packet. NOTE: while 'packets' returns the exact packets read, 'Fs' and 'trace' assume that the record has no gaps and that the sampling frequency has not changed during the recording """ hdr = read_header(fin) csc_packet = pylab.dtype([ ('timestamp', 'Q'), ('chan', 'I'), ('Fs', 'I'), ('Ns', 'I'), ('samp', '512h') ]) if not memmap: data = pylab.fromfile(fin, dtype=csc_packet, count=-1) else: data = pylab.memmap(fin, dtype=csc_packet, mode = 'r', offset=HEADER_BYTE_SIZE) Fs = None trace = None if assume_same_fs: if data['Fs'].std() > 1e-6: # logger.warning('Fs is not fixed across trace, not packing packets together') assume_same_fs = False if not assume_same_fs or memmap: return {'header': hdr, 'packets': data} sample_duration_us = (1./data['Fs'][0])*1e6 packet_duration_us = 513 * sample_duration_us #For the version we are dealing with, Neuralynx packets are always 512 #This is actually a very poor estimate if the sampling freq is low, since it rounds to nearest Hz #So we'll not rely on this but come up with our own estimate #Using 512 samples makes the estimate in high sampling frequencies be 1 or 2 us smaller than the dt_us #So we are going to assume that any pause we did takes longer than 1/Fs seconds samp = data['samp'] ts_us = data['timestamp'] dt_us = pylab.diff(ts_us).astype('f') idx = pylab.find(dt_us > packet_duration_us) #This will find any instances where we paused the recording if idx.size == 0:#No padding needed trace = samp.ravel() Fs = (data['Ns'][:-1]/(dt_us*1e-6)).mean() else: #We have some padding to do. logger.debug('Gaps in record, padding') #Our first task is to find all the contiguous sections of data idx += 1 #Shifting indexes to point at the packets that come after a gap idx = pylab.insert(idx, 0, 0) #Now idx contains the indexes of every packet that starts a contiguous section idx = pylab.append(idx,ts_us.size) #And the index of the last packet Ns = data['Ns'] estimFs_sum = 0 N_samps = 0 sections = [] for n in range(idx.size-1): #collect all the sections n0 = idx[n]; n1=idx[n+1] sections.append(samp[n0:n1].ravel()) if n1-n0 > 1:#We need more than one packet in a section to get an estimate estimFs_sum += (Ns[n0:n1-1]/(dt_us[n0:n1-1]*1e-6)).sum() N_samps += n1-1-n0 Fs = estimFs_sum / float(N_samps) #Now pad the data appropriately padded = [sections[0]] cum_N = sections[0].size for n in range(1,len(sections)): #Now figure out how many zeros we have to pad to get the right length Npad = int((ts_us[idx[n]] - ts_us[0])*1e-6*Fs - cum_N) padded.append(pylab.zeros(Npad)) padded.append(sections[n]) cum_N += Npad + sections[n].size trace = pylab.concatenate(padded) #From this packet to the packet before the gap return {'header': hdr, 'packets': data, 'Fs': Fs, 'trace': trace, 't0': ts_us[0]}