def test_reptrack(): M = 4 N = 30000 * 10 timeseries = np.random.normal(0, 1, (M, N)) writemda32(timeseries, 'tmp_pre.mda') reptrack(timeseries='tmp_pre.mda', firings_out='tmp_firings.mda', detect_sign=1, section_size=1 * 30000)
def test_bandpass_filter(): M, N = 12, 30000 X = np.random.rand(M, N) writemda32(X, 'tmp.mda') ret = bandpass_filter(timeseries="tmp.mda", timeseries_out="tmp2.mda") assert (ret) A = readmda('tmp.mda') B = readmda('tmp2.mda') assert (A.shape == B.shape) assert (X.shape == B.shape) #np.testing.assert_array_almost_equal(A,B,decimal=6) return True
def test_normalize_channels(): M, N = 4, 1000 X = np.random.rand(M, N) writemda32(X, 'tmp.mda') ret = normalize_channels(timeseries="tmp.mda", timeseries_out="tmp2.mda") assert (ret) A = readmda('tmp.mda') B = readmda('tmp2.mda') A_mean = np.mean(A, axis=1) A_stdev = np.sqrt(np.var(A, axis=1, ddof=1)) A_norm = (A - np.tile(np.reshape(A_mean, (M, 1)), (1, N))) / np.tile(np.reshape(A_stdev, (M, 1)), (1, N)) np.testing.assert_array_almost_equal(A_norm, B, decimal=5) return True
def test_compute_templates(): M, N, K, T, L = 5, 1000, 6, 50, 100 X = np.random.rand(M, N) writemda32(X, 'tmp.mda') F = np.zeros((3, L)) F[1, :] = 1 + np.random.randint(N, size=(1, L)) F[2, :] = 1 + np.random.randint(K, size=(1, L)) writemda64(F, 'tmp2.mda') ret = compute_templates(timeseries='tmp.mda', firings='tmp2.mda', templates_out='tmp3.mda', clip_size=T) assert (ret) templates0 = readmda('tmp3.mda') assert (templates0.shape == (M, T, K)) return True
def test_extract_clips(): M, T, L, N = 5, 100, 100, 1000 X = np.random.rand(M, N).astype(np.float32) writemda32(X, 'tmp.mda') F = np.zeros((2, L)) F[1, :] = 200 + np.random.randint(N - 400, size=(1, L)) writemda64(F, 'tmp2.mda') ret = extract_clips(timeseries='tmp.mda', firings='tmp2.mda', clips_out='tmp3.mda', clip_size=T) assert (ret) clips0 = readmda('tmp3.mda') assert (clips0.shape == (M, T, L)) t0 = int(F[1, 10]) a = int(np.floor((T + 1) / 2 - 1)) np.array_equal(clips0[:, :, 10], X[:, t0 - a:t0 - a + T]) #np.testing.assert_almost_equal(clips0[:,:,10],X[:,t0-a:t0-a+T],decimal=4) return True
def compute_templates(*, timeseries, firings, templates_out, clip_size=100): """ Compute templates (average waveforms) for clusters defined by the labeled events in firings. Parameters ---------- timeseries : INPUT Path of timeseries mda file (MxN) from which to draw the event clips (snippets) for computing the templates. M is number of channels, N is number of timepoints. firings : INPUT Path of firings mda file (RxL) where R>=3 and L is the number of events. Second row are timestamps, third row are integer labels. templates_out : OUTPUT Path of output mda file (MxTxK). T=clip_size, K=maximum cluster label. Note that empty clusters will correspond to a template of all zeros. clip_size : int (Optional) clip size, aka snippet size, number of timepoints in a single template """ templates = compute_templates_helper(timeseries=timeseries, firings=firings, clip_size=clip_size) return writemda32(templates, templates_out)
def extract_clips(*, timeseries, firings, clips_out, clip_size=100): """ Extract clips corresponding to spike events Parameters ---------- timeseries : INPUT Path of timeseries mda file (MxN) from which to draw the event clips (snippets) firings : INPUT Path of firings mda file (RxL) where R>=2 and L is the number of events. Second row are timestamps. clips_out : OUTPUT Path of clips mda file (MxTxL). T=clip_size clip_size : int (Optional) clip size, aka snippet size, aka number of timepoints in a single clip """ F = readmda(firings) times = F[1, :] clips = extract_clips_helper(timeseries=timeseries, times=times, clip_size=clip_size) return writemda32(clips, clips_out)
def anneal_segments(*, timeseries_list, firings_list, firings_out, dmatrix_out='', k1_dmatrix_out='', k2_dmatrix_out='', dmatrix_templates_out='', time_offsets): """ Combine a list of firings files to form a single firings file Link firings labels to first firings.mda, all other firings labels are incremented Parameters ---------- timeseries_list : INPUT A list of paths of timeseries mda files to be used for drift adjustment / time offsets firings_list : INPUT A list of paths of firings mda files to be concatenated/drift adjusted firings_out : OUTPUT The output firings dmatrix_out : OUTPUT The distance matrix used k1_dmatrix_out : OUTPUT The mean distances of k1 templates to k1 spikes k2_dmatrix_out : OUTPUT The mean distances of k2 templates to k2 spikes dmatrix_templates_out : OUTPUT The templates used to compute the distance matrix ... time_offsets : string An array of time offsets for each firings file. Expect one offset for each firings file. ... """ print('timeseries_list' + str(timeseries_list)) print('firings_list' + str(firings_list)) print('firings_out' + str(firings_out)) print('time_offsets ' + str(time_offsets)) if time_offsets: time_offsets = np.fromstring(time_offsets, dtype=np.float_, sep=',') #print('time_offsets ' + str(time_offsets)) else: print( 'No time offsets provided - assuming zero time gap/continuously recorded data' ) time_offsets = np.zeros(len(timeseries_list)) # Get toffsets based on length of preceeding timeseries - first one left as zero for timeseries in range(len(timeseries_list) - 1): X = DiskReadMda(timeseries_list[timeseries]) time_offsets[timeseries + 1] = time_offsets[timeseries] + X.N2() concatenated_firings = concat_and_increment(firings_list, time_offsets) (dmatrix, k1_dmatrix, k2_dmatrix, templates, Kmaxes) = get_dmatrix_templates(timeseries_list, firings_list) dmatrix[np.isnan(dmatrix)] = -1 # set nans to -1 to avoid runtime error k1_dmatrix[ dmatrix < 0] = np.nan # replace all negative dist numbers (no comparison) with NaN k2_dmatrix[ dmatrix < 0] = np.nan # replace all negative dist numbers (no comparison) with NaN dmatrix[ dmatrix < 0] = np.nan # then replace all negative dist numbers (no comparison) with NaN #TODO: Improve join function pairs_to_merge = get_join_matrix(dmatrix, k1_dmatrix, templates, Kmaxes) # Returns with base 1 adjustment pairs_to_merge = np.reshape(pairs_to_merge, (-1, 2)) pairs_to_merge = pairs_to_merge[~np.isnan(pairs_to_merge).any( axis=1)] # Eliminate all rows with NaN pairs_to_merge = pairs_to_merge[np.argsort( pairs_to_merge[:, 0])] # Assure that input is sorted #Propagate merge pairs to lowest label number for idx, label in enumerate(pairs_to_merge[:, 1]): pairs_to_merge[np.isin(pairs_to_merge[:, 0], label), 0] = pairs_to_merge[idx, 0] # Input should be sorted #Merge firing labels for merge_pair in range(pairs_to_merge.shape[0]): concatenated_firings[ 2, np.isin(concatenated_firings[2, :], pairs_to_merge[ merge_pair, 1])] = pairs_to_merge[merge_pair, 0] # Already base 1 corrected writemda64(dmatrix, dmatrix_out) writemda32(templates, dmatrix_templates_out) writemda64(k1_dmatrix, k1_dmatrix_out) writemda64(k2_dmatrix, k2_dmatrix_out) #Write return writemda64(concatenated_firings, firings_out)
def synthesize_random_waveforms(*, waveforms_out=None, geometry_out=None, M=5, T=500, K=20, upsamplefac=13): """ Synthesize random waveforms for use in creating a synthetic timeseries dataset Parameters ---------- waveforms_out : OUTPUT Path to waveforms mda file. Mx(T*upsamplefac)xK geometry_out : OUTPUT (Optional) Path to geometry csv file M : int (Optional) Number of channels T : int (Optional) Number of timepoints for a waveform, before upsampling K : int (Optional) Number of waveforms to synthesize upsamplefac : int (Optional) used for upsampling the waveforms to avoid discretization artifacts """ geometry = None avg_durations = [200, 10, 30, 200] avg_amps = [0.5, 10, -1, 0] rand_durations_stdev = [10, 4, 6, 20] rand_amps_stdev = [0.2, 3, 0.5, 0] rand_amp_factor_range = [0.5, 1] geom_spread_coef1 = 0.2 geom_spread_coef2 = 1 average_peak_amplitude = 10 timeshift_factor = 3 if not geometry: geometry = np.zeros((2, M)) geometry[0, :] = np.arange(1, M + 1) geometry = np.array(geometry) avg_durations = np.array(avg_durations) avg_amps = np.array(avg_amps) rand_durations_stdev = np.array(rand_durations_stdev) rand_amps_stdev = np.array(rand_amps_stdev) rand_amp_factor_range = np.array(rand_amp_factor_range) neuron_locations = get_default_neuron_locations(M, K, geometry) ## The waveforms_out WW = np.zeros((M, T * upsamplefac, K)) for k in range(1, K + 1): for m in range(1, M + 1): diff = neuron_locations[:, k - 1] - geometry[:, m - 1] dist = np.sqrt(np.sum(diff**2)) durations0 = np.maximum( np.ones(avg_durations.shape), avg_durations + np.random.randn(1, 4) * rand_durations_stdev) * upsamplefac amps0 = avg_amps + np.random.randn(1, 4) * rand_amps_stdev waveform0 = synthesize_single_waveform(N=T * upsamplefac, durations=durations0, amps=amps0) waveform0 = np.roll(waveform0, int(timeshift_factor * dist * upsamplefac)) waveform0 = waveform0 * np.random.uniform(rand_amp_factor_range[0], rand_amp_factor_range[1]) WW[m - 1, :, k - 1] = waveform0 / (geom_spread_coef1 + dist * geom_spread_coef2) peaks = np.max(np.abs(WW), axis=(0, 1)) WW = WW / np.mean(peaks) * average_peak_amplitude if waveforms_out: writemda32(WW, waveforms_out) if geometry_out: np.savetxt(geometry_out, geometry.transpose(), delimiter=",", fmt="%g") return True else: return True else: return (WW, geometry)
def synthesize_timeseries(*, firings='', waveforms='', timeseries_out=None, noise_level=1, samplerate=30000, duration=60, waveform_upsamplefac=1, amplitudes_row=0): """ Synthesize an electrophysiology timeseries from a set of ground-truth firing events and waveforms Parameters ---------- firings : INPUT (Optional) The path of firing events file in .mda format. RxL where R>=3 and L is the number of events. Second row is the timestamps, third row is the integer labels/ waveforms : INPUT (Optional) The path of (possibly upsampled) waveforms file in .mda format. Mx(T*waveform_upsample_factor)*K, where M is the number of channels, T is the clip size, and K is the number of units. timeseries_out : OUTPUT The output path for the new timeseries. MxN noise_level : double (Optional) Standard deviation of the simulated background noise added to the timeseries samplerate : double (Optional) Sample rate for the synthetic dataset in Hz duration : double (Optional) Duration of the synthetic dataset in seconds. The number of timepoints will be duration*samplerate waveform_upsamplefac : int (Optional) The upsampling factor corresponding to the input waveforms. (avoids digitization artifacts) amplitudes_row : int (Optional) If positive, this is the row in the firings arrays where the amplitude scale factors are found. Otherwise, use all 1's """ num_timepoints = np.int64(samplerate * duration) waveform_upsamplefac = int(waveform_upsamplefac) if type(waveforms) == str: if waveforms: W = readmda(waveforms) else: W = np.zeros((4, 100 * waveform_upsamplefac, 0)) else: W = waveforms if type(firings) == str: if firings: F = readmda(firings) else: F = np.zeros((3, 0)) else: F = firings times = F[1, :] labels = F[2, :].astype('int') M, TT, K = W.shape[0], W.shape[1], W.shape[2] T = int(TT / waveform_upsamplefac) Tmid = int(np.ceil((T + 1) / 2 - 1)) N = num_timepoints if (N == 0): if times.size == 0: N = T else: N = max(times) + T X = np.random.randn(M, N) * noise_level waveform_list = [] for k in range(K): waveform0 = W[:, :, k - 1] waveform_list.append(waveform0) for j in range(times.size): t0 = times[j] k0 = labels[j] amp0 = 1 if amplitudes_row > 0: amp0 = F[amplitudes_row - 1, j] waveform0 = waveform_list[k0 - 1] frac_offset = int(np.floor((t0 - np.floor(t0)) * waveform_upsamplefac)) tstart = np.int64(np.floor(t0)) - Tmid if (0 <= tstart) and (tstart + T <= N): X[:, tstart:tstart + T] = X[:, tstart:tstart + T] + waveform0[:, frac_offset::waveform_upsamplefac] * amp0 if timeseries_out: return writemda32(X, timeseries_out) else: return (X)