def compute_dmatrix(timeseries1, timeseries2, F1, F2, *, clip_size): X = DiskReadMda(timeseries1) M = X.N1() F1b = get_last_events(F1, 100) F2b = get_first_events(F2, 100) times1 = F1b[1, :].ravel() labels1 = F1b[2, :].ravel() clips1 = extract_clips_helper(timeseries=timeseries1, times=times1, clip_size=clip_size) times2 = F2b[1, :].ravel() labels2 = F2b[2, :].ravel() clips2 = extract_clips_helper(timeseries=timeseries2, times=times2, clip_size=clip_size) K1 = int(max(labels1)) K2 = int(max(labels2)) dmatrix = np.zeros((K1, K2)) templates1 = np.zeros((M, clip_size, K1)) templates2 = np.zeros((M, clip_size, K2)) for k1 in range(1, K1 + 1): #times1_k1=times1[np.where(labels1==k1)[0]] inds_k1 = np.where(labels1 == k1)[0] clips1_k1 = clips1[:, :, inds_k1] templates1[:, :, k1 - 1] = np.mean(clips1_k1, axis=2) for k2 in range(1, K2 + 1): #times2_k2=times2[np.where(labels2==k2)[0]] inds_k2 = np.where(labels2 == k2)[0] clips2_k2 = clips2[:, :, inds_k2] templates2[:, :, k2 - 1] = np.mean(clips2_k2, axis=2) dmatrix[k1 - 1, k2 - 1] = compute_distance_between_clusters( clips1_k1, clips2_k2) return (dmatrix, templates1, templates2)
def bandpass_filter(*, timeseries, timeseries_out, samplerate=30000, freq_min=300, freq_max=6000, freq_wid=1000): """ Apply a bandpass filter to a timeseries dataset Parameters ---------- timeseries : INPUT Path of timeseries, MxN where M is number of channels and N number of timepoints, in .mda format timeseries_out : OUTPUT Path of output timeseries in .mda format samplerate : double (Optional) Sampling rate of input timeseries in Hz freq_min : double (Optional) Lower edge of freq band freq_max : double (Optional) Upper edge of freq band freq_wid : double (Optional) A parameter that controls the sharpness of the band edge transition """ X = DiskReadMda(timeseries) M, N = X.N1(), X.N2() _writer = DiskWriteMda(timeseries_out, [M, N], dt='float32') chunk_size_mb = 100 overlap_size = 100000 def _kernel(chunk, info): print('Processing chunk --- (%g%%)...' % (np.floor(info.t1 / N * 100))) chunk = chunk.astype('float32', copy=False) cpp.bandpass_filter(chunk, samplerate, freq_min, freq_max, freq_wid) print(chunk.shape) print(info.t1, info.t2, info.t1a, info.t2a) return _writer.writeChunk(chunk[:, info.t1a:info.t2a + 1], i1=0, i2=info.t1) TCR = TimeseriesChunkReader(chunk_size_mb=chunk_size_mb, overlap_size=overlap_size) if not TCR.run(timeseries, _kernel): return False return True
def get_dmatrix_templates(timeseries_list, firings_list): X = DiskReadMda(timeseries_list[0]) M = X.N1() clip_size = 50 num_segments = len(timeseries_list) firings_arrays = [] Kmaxes = [] for j in range(num_segments): F = readmda(firings_list[j]) firings_arrays.append(F) Kmax = 0 for j in range(num_segments): F = firings_arrays[j] print(str(len(F[1, :])) + ' clustered events in segment ' + str(j)) labels = F[2, :] if len(labels) == 0: Kmax = 0 Kmaxes.append(0) else: Kmax = int(max(Kmax, np.max(labels))) Kmaxes.append(np.max(labels)) if max(Kmaxes) > 0: use_max = int(max(Kmaxes)) dmatrix = np.ones((use_max, use_max, num_segments - 1)) * (-1) k1_dmatrix = np.ones((use_max, use_max, num_segments - 1)) * (-1) k2_dmatrix = np.ones((use_max, use_max, num_segments - 1)) * (-1) templates = np.zeros((M, clip_size, use_max, 2 * (num_segments - 1))) for j in range(num_segments - 1): print('Computing dmatrix between segments %d and %d' % (j, j + 1)) #print(timeseries_list) if np.size(firings_arrays[j]) == 0 or np.size( firings_arrays[j + 1]) == 0: #templates = np.zeros((M, clip_size, 1)) continue else: (dmatrix0, k1_dmatrix0, k2_dmatrix0, templates1, templates2) = compute_dmatrix(timeseries_list[j], timeseries_list[j + 1], firings_arrays[j], firings_arrays[j + 1], clip_size=clip_size) dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1], j] = dmatrix0 k1_dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1], j] = k1_dmatrix0 k2_dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1], j] = k2_dmatrix0 templates[:, :, 0:dmatrix0.shape[0], j * 2] = templates1 templates[:, :, 0:dmatrix0.shape[1], j * 2 + 1] = templates2 return (dmatrix, k1_dmatrix, k2_dmatrix, templates, Kmaxes)
def join_segments(*, timeseries_list, firings_list, dmatrix_out, templates_out): """ Join the results of spike sorting on a sequence of time segments to form a single firings file Parameters ---------- timeseries_list : INPUT A list of paths of adjacent preprocessed timeseries segment files firings_list : INPUT A list of paths to corresponding firings files dmatrix_out : OUTPUT dmatrix for debugging templates_out : OUTPUT templates for debugging """ X = DiskReadMda(timeseries_list[0]) M = X.N1() clip_size = 100 num_segments = len(timeseries_list) firings_arrays = [] for j in range(num_segments): F = readmda(firings_list[j]) firings_arrays.append(F) Kmax = 0 for j in range(num_segments): F = firings_arrays[j] labels = F[2, :] Kmax = int(max(Kmax, np.max(labels))) dmatrix = np.ones((Kmax, Kmax, num_segments - 1)) * (-1) templates = np.zeros((M, clip_size, Kmax, 2 * (num_segments - 1))) for j in range(num_segments - 1): print('Computing dmatrix between segments %d and %d' % (j, j + 1)) (dmatrix0, templates1, templates2) = compute_dmatrix(timeseries_list[j], timeseries_list[j + 1], firings_arrays[j], firings_arrays[j + 1], clip_size=clip_size) dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1], j] = dmatrix0 templates[:, :, 0:dmatrix0.shape[0], j * 2] = templates1 templates[:, :, 0:dmatrix0.shape[1], j * 2 + 1] = templates2 writemda64(templates, templates_out) return writemda64(dmatrix, dmatrix_out)
def normalize_channels(*, timeseries, timeseries_out): """ Normalize the channels in a timeseries array to each have unit variance Parameters ---------- timeseries : INPUT Path of timeseries, MxN where M is number of channels and N number of timepoints, in .mda format timeseries_out : OUTPUT Path of output timeseries in .mda format """ X = DiskReadMda(timeseries) M, N = X.N1(), X.N2() _writer = DiskWriteMda(timeseries_out, [M, N], dt=X.dt()) chunk_size_mb = 100 normalize_channels._sums = np.zeros(M) normalize_channels._sumsqrs = np.zeros(M) def _kernel_compute_sumsqrs(chunk, info): normalize_channels._sums = normalize_channels._sums + np.sum(chunk, axis=1) normalize_channels._sumsqrs = normalize_channels._sumsqrs + np.sum( chunk**2, axis=1) return True def _kernel_normalize_and_write(chunk, info): Nchunk = chunk.shape[1] means = normalize_channels._sums / N variances = (normalize_channels._sumsqrs - normalize_channels._sums**2 / N) / (N - 1) stdevs = np.sqrt(variances) stdevs[np.where(stdevs == 0)] = 1 means = np.reshape(means, (M, 1)) stdevs = np.reshape(stdevs, (M, 1)) chunk = (chunk - np.tile(means, (1, Nchunk))) / np.tile(stdevs, (1, Nchunk)) return _writer.writeChunk(chunk, i1=0, i2=info.t1) TCR = TimeseriesChunkReader(chunk_size_mb=chunk_size_mb, overlap_size=0) if not TCR.run(timeseries, _kernel_compute_sumsqrs): return False if not TCR.run(timeseries, _kernel_normalize_and_write): return False return True
def compute_templates_helper(*, timeseries, firings, clip_size=100): X = DiskReadMda(timeseries) M, N = X.N1(), X.N2() N = N F = readmda(firings) L = F.shape[1] L = L T = clip_size times = F[1, :] labels = F[2, :].astype(int) K = np.max(labels) compute_templates._sums = np.zeros((M, T, K)) compute_templates._counts = np.zeros(K) def _kernel(chunk, info): inds = np.where((info.t1 <= times) & (times <= info.t2))[0] times0 = (times[inds] - info.t1 + info.t1a).astype(np.int32) labels0 = labels[inds] clips0 = np.zeros((M, clip_size, len(inds)), dtype=np.float32, order='F') cpp.extract_clips(clips0, chunk, times0, clip_size) for k in range(1, K + 1): inds_kk = np.where(labels0 == k)[0] compute_templates._sums[:, :, k - 1] = compute_templates._sums[:, :, k - 1] + np.sum( clips0[:, :, inds_kk], axis=2) compute_templates._counts[ k - 1] = compute_templates._counts[k - 1] + len(inds_kk) return True TCR = TimeseriesChunkReader(chunk_size_mb=40, overlap_size=clip_size * 2) if not TCR.run(timeseries, _kernel): return None templates = np.zeros((M, T, K)) for k in range(1, K + 1): if compute_templates._counts[k - 1]: templates[:, :, k - 1] = compute_templates._sums[:, :, k - 1] / compute_templates._counts[ k - 1] return templates
def run(self, mdafile_path_or_diskreadmda, func): if (type(mdafile_path_or_diskreadmda) == str): X = DiskReadMda(mdafile_path_or_diskreadmda) else: X = mdafile_path_or_diskreadmda M, N = X.N1(), X.N2() cs = max( [self._chunk_size, int(self._chunk_size_mb * 1e6 / (M * 4)), M]) if self._t1 < 0: self._t1 = 0 if self._t2 < 0: self._t2 = N - 1 t = self._t1 while t <= self._t2: t1 = t t2 = min(self._t2, t + cs - 1) s1 = max(0, t1 - self._overlap_size) s2 = min(N - 1, t2 + self._overlap_size) timer = time.time() chunk = X.readChunk(i1=0, N1=M, i2=s1, N2=s2 - s1 + 1) self._elapsed_reading += time.time() - timer info = TimeseriesChunkInfo() info.t1 = t1 info.t2 = t2 info.t1a = t1 - s1 info.t2a = t2 - s1 info.size = t2 - t1 + 1 timer = time.time() if not func(chunk, info): return False self._elapsed_running += time.time() - timer t = t + cs if self._verbose: print( 'Elapsed for TimeseriesChunkReader: %g sec reading, %g sec running' % (self._elapsed_reading, self._elapsed_running)) return True
def extract_clips_helper(*, timeseries, times, clip_size=100, verbose=False): X = DiskReadMda(timeseries) M, N = X.N1(), X.N2() L = times.size T = clip_size extract_clips_helper._clips = np.zeros((M, T, L)) def _kernel(chunk, info): inds = np.where((info.t1 <= times) & (times <= info.t2))[0] times0 = times[inds] - info.t1 + info.t1a clips0 = np.zeros((M, clip_size, len(inds)), dtype=np.float32, order='F') cpp.extract_clips(clips0, chunk, times0, clip_size) extract_clips_helper._clips[:, :, inds] = clips0 return True TCR = TimeseriesChunkReader(chunk_size_mb=100, overlap_size=clip_size * 2, verbose=verbose) if not TCR.run(timeseries, _kernel): return None return extract_clips_helper._clips
def extract_timeseries(*, timeseries, channels_array='', timeseries_out, channels='', t1=-1, t2=-1, timeseries_dtype='', timeseries_num_channels=0): """ Extract a chunk of a timeseries dataset and possibly a subset of channels Parameters ---------- timeseries : INPUT Path of timeseries, MxN where M is number of channels and N number of timepoints, in either .mda or raw binary format. If raw binary, then you must supply dtype and num_channels. channels_array : INPUT Path of array of channel numbers (positive integers). Either use this or the channels parameter, not both. timeseries_out : OUTPUT Path of output timeseries in .mda format channels : string Comma-separated list of channels to extract. Either use this or the channels_array input, not both. t1 : integer Integer start timepoint (zero-based indexing). If -1 will set to zero. t2 : integer Integer end timepoint (zero-based indexing). If -1 will set to N-1."}, timeseries_dtype : string Only supply this if timeseries is in raw binary format. Choices are int16, uint16, int32, float32, etc. timeseries_num_channels : integer Only supply this if timeseries is in raw binary format. Integer representing number of channels. Number of timepoints will be deduced """ if channels: _channels = np.fromstring(channels, dtype=int, sep=',') elif channels_array: _channels = readmda(channels_array).ravel() else: _channels = np.empty(0) header0 = None if (timeseries_dtype): size_bytes = os.path.getsize(timeseries) num_bytes_per_entry = get_num_bytes_per_entry_from_dt(timeseries_dtype) if t2 >= 0: num_entries = (t2 + 1) * (timeseries_num_channels) else: num_entries = size_bytes / num_bytes_per_entry if (num_entries % timeseries_num_channels != 0): print( "File size (%ld) is not divisible by number of channels (%g) for dtype=%s" % (size_bytes, timeseries_num_channels, timeseries_dtype)) return False num_timepoints = num_entries / timeseries_num_channels header0 = MdaHeader(timeseries_dtype, [timeseries_num_channels, num_timepoints]) X = DiskReadMda(timeseries, header0) M, N = X.N1(), X.N2() if (_channels.size == 0): _channels = np.array(1 + np.arange(M)) M2 = _channels.size if (t1 < 0): t1 = 0 if (t2 < 0): t2 = N - 1 N2 = t2 - t1 + 1 _writer = DiskWriteMda(timeseries_out, [M2, N2], dt=X.dt()) def _kernel(chunk, info): chunk = chunk[(_channels - 1).tolist(), ] return _writer.writeChunk(chunk, i1=0, i2=info.t1) chunk_size_mb = 100 TCR = TimeseriesChunkReader(chunk_size_mb=chunk_size_mb, overlap_size=0, t1=t1, t2=t2) return TCR.run(X, _kernel)
def reptrack(*, timeseries, firings_out, detect_threshold=3, detect_sign=0, section_size=60 * 30000, detect_interval=20, detect_channel=0): """ Find representative spikes for the single "best"unit that stretches all the way through the dataset Parameters ---------- timeseries : INPUT The preprocessed timeseries array firings_out : OUTPUT The firings file (for the single unit) detect_channel : int Channel for detection (1-based indexing) or 0 to detect on max over all channels detect_threshold : float Threshold for detection detect_sign : int Sign for the detection -1, 0, or 1 section_size : int Size of each section (in timepoints) """ X = DiskReadMda(timeseries) M = X.N1() N = X.N2() num_sections = int(np.floor(N / section_size)) chunk_infos = [] S = 3 #number of scores to track clips_prev = np.zeros(0) for ii in range(0, num_sections): # Read the current chunk chunk0 = X.readChunk(i1=0, i2=ii * section_size, N1=M, N2=section_size) # Detect the events during this chunk and offset the times if (detect_channel > 0): signal_for_detect = chunk0[detect_channel - 1, :] else: if detect_sign == 0: signal_for_detect = np.max(np.abs(chunk0), axis=0) elif detect_sign > 0: signal_for_detect = np.max(chunk0, axis=0) else: signal_for_detect = np.min(chunk0, axis=0) times0 = detect(signal_for_detect, detect_threshold, detect_sign, detect_interval) times0 = times0 + ii * section_size L0 = len(times0) # Extract the clips for this chunk clips0 = extract_clips_helper(timeseries=timeseries, times=times0, clip_size=50) if ii == 0: # If this is the first chunk, initialize things scores0 = np.zeros((S, L0)) connections0 = np.ones(L0) * -1 else: # Some results from the previous chunk times_prev = chunk_infos[ii - 1]['times'] scores_prev = chunk_infos[ii - 1]['scores'] # Compute PCA features on the clips from this and the previous chunk combined clips_combined = np.concatenate((clips_prev, clips0), axis=2) features_combined = compute_clips_features(clips_combined, num_features=10) features0 = features_combined[:, len(times_prev):] features_prev = features_combined[:, 0:len(times_prev)] # Compute the nearest neighbors (candidates for connections) nbrs = NearestNeighbors(n_neighbors=50, algorithm='ball_tree') nbrs.fit(features_prev.transpose()) nearest_inds = nbrs.kneighbors(features0.transpose(), return_distance=False) # For each, find the best connection among the candidates scores0 = np.zeros((S, L0)) connections0 = np.zeros(L0) maxmins_prev = scores_prev[0, :] averages_prev = scores_prev[1, :] for jj in range(len(times0)): tmp = features0[:, jj] nearest_inds_jj = nearest_inds[jj, :].tolist() dists = np.linalg.norm(features_prev[:, nearest_inds_jj] - tmp.reshape((len(tmp), 1)), axis=0) normalized_distances = dists / np.linalg.norm(tmp) maxmins = np.maximum(normalized_distances, maxmins_prev[nearest_inds_jj]) averages = (normalized_distances + averages_prev[nearest_inds_jj] * (ii + 1)) / (ii + 2) overall_scores = maxmins + averages * 0.1 ind0 = np.argmin(overall_scores) scores0[0, jj] = maxmins[ind0] scores0[1, jj] = averages[ind0] scores0[2, jj] = overall_scores[ind0] connections0[jj] = nearest_inds_jj[ind0] clips_prev = clips0 # Store the results for this chunk info0 = { 'times': times0, 'connections': connections0, 'scores': scores0 } chunk_infos.append(info0) rep_times = np.zeros(len(chunk_infos)) last_chunk_info = chunk_infos[len(chunk_infos) - 1] last_times = last_chunk_info['times'] last_overall_scores = last_chunk_info['scores'][S - 1, :] last_to_first_connections = np.zeros(len(last_times)) for kk in range(0, len(last_times)): ind0 = kk for ii in range(len(chunk_infos) - 2, -1, -1): ind0 = int(chunk_infos[ii + 1]['connections'][ind0]) last_to_first_connections[kk] = ind0 print('Unique:') unique1 = np.unique(last_to_first_connections) print(len(unique1)) print(len(chunk_infos[0]['times'])) rep_times = [] rep_labels = [] for aa in range(0, len(unique1)): bb = np.where(last_to_first_connections == unique1[aa])[0] cc = np.argmax(last_overall_scores[bb]) ind0 = bb[cc] rep_times.append(last_chunk_info['times'][ind0]) rep_labels.append(aa) for ii in range(len(chunk_infos) - 1, 0, -1): ind0 = int(chunk_infos[ii]['connections'][ind0]) rep_times.append(chunk_infos[ii - 1]['times'][ind0]) rep_labels.append(aa) #ind0=np.argmin(last_chunk_info['scores'][S-1,:]) #Overall score is in row S-1 #rep_times[len(chunk_infos)-1]=last_chunk_info['times'][ind0] #for ii in range(len(chunk_infos)-1,0,-1): # ind0=int(chunk_infos[ii]['connections'][ind0]) # rep_times[ii-1]=chunk_infos[ii-1]['times'][ind0] firings = np.zeros((3, len(rep_times))) for jj in range(len(rep_times)): firings[1, jj] = rep_times[jj] firings[2, jj] = rep_labels[jj] return writemda64(firings, firings_out)
def get_dmatrix_templates(timeseries_list, firings_list): X = DiskReadMda(timeseries_list[0]) M = X.N1() clip_size = 50 num_segments = len(timeseries_list) segment_combos = it.combinations( range(num_segments), 2) # Get all possible segment combinations segment_combos = np.array(list(segment_combos)) # Order segment combinations such that neighbors are first, then non-neighbors segment_combos = np.append( segment_combos[np.where(np.diff(segment_combos) == 1)[0], :], segment_combos[np.where(np.diff(segment_combos) > 1)[0], :], axis=0) num_combos = int(comb(num_segments, 2)) firings_arrays = [] Kmaxes = [] for j in range(num_segments): F = readmda(firings_list[j]) firings_arrays.append(F) Kmax = 0 for j in range(num_segments): F = firings_arrays[j] print(str(len(F[1, :])) + ' clustered events in segment ' + str(j)) labels = F[2, :] if len(labels) == 0: Kmax = 0 Kmaxes.append(0) else: Kmax = int(max(Kmax, np.max(labels))) Kmaxes.append(np.max(labels)) if max(Kmaxes) > 0: use_max = int(max(Kmaxes)) dmatrix = np.ones((use_max, use_max, num_combos)) * (-1) k1_dmatrix = np.ones((use_max, use_max, num_combos)) * (-1) k2_dmatrix = np.ones((use_max, use_max, num_combos)) * (-1) templates = np.zeros((M, clip_size, use_max, 2 * num_combos)) for n in range(num_combos): # count up to number of combinations for dmatrix 3rd dimension indexing j1 = segment_combos[n, 0] j2 = segment_combos[n, 1] print('Computing dmatrix between segments %d and %d' % (j1, j2)) #print(timeseries_list) if np.size(firings_arrays[j1]) == 0 or np.size( firings_arrays[j2]) == 0: #templates = np.zeros((M, clip_size, 1)) continue else: (dmatrix0, k1_dmatrix0, k2_dmatrix0, templates1, templates2) = compute_dmatrix(timeseries_list[j1], timeseries_list[j2], firings_arrays[j1], firings_arrays[j2], clip_size=clip_size) dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1], n] = dmatrix0 k1_dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1], n] = k1_dmatrix0 k2_dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1], n] = k2_dmatrix0 templates[:, :, 0:dmatrix0.shape[0], n * 2] = templates1 templates[:, :, 0:dmatrix0.shape[1], n * 2 + 1] = templates2 return (dmatrix, k1_dmatrix, k2_dmatrix, templates, Kmaxes, segment_combos)