def normalize_channels(*, timeseries, timeseries_out): """ Normalize the channels in a timeseries array to each have unit variance Parameters ---------- timeseries : INPUT Path of timeseries, MxN where M is number of channels and N number of timepoints, in .mda format timeseries_out : OUTPUT Path of output timeseries in .mda format """ X = DiskReadMda(timeseries) M, N = X.N1(), X.N2() _writer = DiskWriteMda(timeseries_out, [M, N], dt=X.dt()) chunk_size_mb = 100 normalize_channels._sums = np.zeros(M) normalize_channels._sumsqrs = np.zeros(M) def _kernel_compute_sumsqrs(chunk, info): normalize_channels._sums = normalize_channels._sums + np.sum(chunk, axis=1) normalize_channels._sumsqrs = normalize_channels._sumsqrs + np.sum( chunk**2, axis=1) return True def _kernel_normalize_and_write(chunk, info): Nchunk = chunk.shape[1] means = normalize_channels._sums / N variances = (normalize_channels._sumsqrs - normalize_channels._sums**2 / N) / (N - 1) stdevs = np.sqrt(variances) stdevs[np.where(stdevs == 0)] = 1 means = np.reshape(means, (M, 1)) stdevs = np.reshape(stdevs, (M, 1)) chunk = (chunk - np.tile(means, (1, Nchunk))) / np.tile(stdevs, (1, Nchunk)) return _writer.writeChunk(chunk, i1=0, i2=info.t1) TCR = TimeseriesChunkReader(chunk_size_mb=chunk_size_mb, overlap_size=0) if not TCR.run(timeseries, _kernel_compute_sumsqrs): return False if not TCR.run(timeseries, _kernel_normalize_and_write): return False return True
def compute_templates_helper(*, timeseries, firings, clip_size=100): X = DiskReadMda(timeseries) M, N = X.N1(), X.N2() N = N F = readmda(firings) L = F.shape[1] L = L T = clip_size times = F[1, :] labels = F[2, :].astype(int) K = np.max(labels) compute_templates._sums = np.zeros((M, T, K)) compute_templates._counts = np.zeros(K) def _kernel(chunk, info): inds = np.where((info.t1 <= times) & (times <= info.t2))[0] times0 = (times[inds] - info.t1 + info.t1a).astype(np.int32) labels0 = labels[inds] clips0 = np.zeros((M, clip_size, len(inds)), dtype=np.float32, order='F') cpp.extract_clips(clips0, chunk, times0, clip_size) for k in range(1, K + 1): inds_kk = np.where(labels0 == k)[0] compute_templates._sums[:, :, k - 1] = compute_templates._sums[:, :, k - 1] + np.sum( clips0[:, :, inds_kk], axis=2) compute_templates._counts[ k - 1] = compute_templates._counts[k - 1] + len(inds_kk) return True TCR = TimeseriesChunkReader(chunk_size_mb=40, overlap_size=clip_size * 2) if not TCR.run(timeseries, _kernel): return None templates = np.zeros((M, T, K)) for k in range(1, K + 1): if compute_templates._counts[k - 1]: templates[:, :, k - 1] = compute_templates._sums[:, :, k - 1] / compute_templates._counts[ k - 1] return templates
def run(self, mdafile_path_or_diskreadmda, func): if (type(mdafile_path_or_diskreadmda) == str): X = DiskReadMda(mdafile_path_or_diskreadmda) else: X = mdafile_path_or_diskreadmda M, N = X.N1(), X.N2() cs = max( [self._chunk_size, int(self._chunk_size_mb * 1e6 / (M * 4)), M]) if self._t1 < 0: self._t1 = 0 if self._t2 < 0: self._t2 = N - 1 t = self._t1 while t <= self._t2: t1 = t t2 = min(self._t2, t + cs - 1) s1 = max(0, t1 - self._overlap_size) s2 = min(N - 1, t2 + self._overlap_size) timer = time.time() chunk = X.readChunk(i1=0, N1=M, i2=s1, N2=s2 - s1 + 1) self._elapsed_reading += time.time() - timer info = TimeseriesChunkInfo() info.t1 = t1 info.t2 = t2 info.t1a = t1 - s1 info.t2a = t2 - s1 info.size = t2 - t1 + 1 timer = time.time() if not func(chunk, info): return False self._elapsed_running += time.time() - timer t = t + cs if self._verbose: print( 'Elapsed for TimeseriesChunkReader: %g sec reading, %g sec running' % (self._elapsed_reading, self._elapsed_running)) return True
def extract_clips_helper(*, timeseries, times, clip_size=100, verbose=False): X = DiskReadMda(timeseries) M, N = X.N1(), X.N2() L = times.size T = clip_size extract_clips_helper._clips = np.zeros((M, T, L)) def _kernel(chunk, info): inds = np.where((info.t1 <= times) & (times <= info.t2))[0] times0 = times[inds] - info.t1 + info.t1a clips0 = np.zeros((M, clip_size, len(inds)), dtype=np.float32, order='F') cpp.extract_clips(clips0, chunk, times0, clip_size) extract_clips_helper._clips[:, :, inds] = clips0 return True TCR = TimeseriesChunkReader(chunk_size_mb=100, overlap_size=clip_size * 2, verbose=verbose) if not TCR.run(timeseries, _kernel): return None return extract_clips_helper._clips
def extract_timeseries(*, timeseries, channels_array='', timeseries_out, channels='', t1=-1, t2=-1, timeseries_dtype='', timeseries_num_channels=0): """ Extract a chunk of a timeseries dataset and possibly a subset of channels Parameters ---------- timeseries : INPUT Path of timeseries, MxN where M is number of channels and N number of timepoints, in either .mda or raw binary format. If raw binary, then you must supply dtype and num_channels. channels_array : INPUT Path of array of channel numbers (positive integers). Either use this or the channels parameter, not both. timeseries_out : OUTPUT Path of output timeseries in .mda format channels : string Comma-separated list of channels to extract. Either use this or the channels_array input, not both. t1 : integer Integer start timepoint (zero-based indexing). If -1 will set to zero. t2 : integer Integer end timepoint (zero-based indexing). If -1 will set to N-1."}, timeseries_dtype : string Only supply this if timeseries is in raw binary format. Choices are int16, uint16, int32, float32, etc. timeseries_num_channels : integer Only supply this if timeseries is in raw binary format. Integer representing number of channels. Number of timepoints will be deduced """ if channels: _channels = np.fromstring(channels, dtype=int, sep=',') elif channels_array: _channels = readmda(channels_array).ravel() else: _channels = np.empty(0) header0 = None if (timeseries_dtype): size_bytes = os.path.getsize(timeseries) num_bytes_per_entry = get_num_bytes_per_entry_from_dt(timeseries_dtype) if t2 >= 0: num_entries = (t2 + 1) * (timeseries_num_channels) else: num_entries = size_bytes / num_bytes_per_entry if (num_entries % timeseries_num_channels != 0): print( "File size (%ld) is not divisible by number of channels (%g) for dtype=%s" % (size_bytes, timeseries_num_channels, timeseries_dtype)) return False num_timepoints = num_entries / timeseries_num_channels header0 = MdaHeader(timeseries_dtype, [timeseries_num_channels, num_timepoints]) X = DiskReadMda(timeseries, header0) M, N = X.N1(), X.N2() if (_channels.size == 0): _channels = np.array(1 + np.arange(M)) M2 = _channels.size if (t1 < 0): t1 = 0 if (t2 < 0): t2 = N - 1 N2 = t2 - t1 + 1 _writer = DiskWriteMda(timeseries_out, [M2, N2], dt=X.dt()) def _kernel(chunk, info): chunk = chunk[(_channels - 1).tolist(), ] return _writer.writeChunk(chunk, i1=0, i2=info.t1) chunk_size_mb = 100 TCR = TimeseriesChunkReader(chunk_size_mb=chunk_size_mb, overlap_size=0, t1=t1, t2=t2) return TCR.run(X, _kernel)
def anneal_segments(*, timeseries_list, firings_list, firings_out, dmatrix_out='', k1_dmatrix_out='', k2_dmatrix_out='', dmatrix_templates_out='', time_offsets): """ Combine a list of firings files to form a single firings file Link firings labels to first firings.mda, all other firings labels are incremented Parameters ---------- timeseries_list : INPUT A list of paths of timeseries mda files to be used for drift adjustment / time offsets firings_list : INPUT A list of paths of firings mda files to be concatenated/drift adjusted firings_out : OUTPUT The output firings dmatrix_out : OUTPUT The distance matrix used k1_dmatrix_out : OUTPUT The mean distances of k1 templates to k1 spikes k2_dmatrix_out : OUTPUT The mean distances of k2 templates to k2 spikes dmatrix_templates_out : OUTPUT The templates used to compute the distance matrix ... time_offsets : string An array of time offsets for each firings file. Expect one offset for each firings file. ... """ print('timeseries_list' + str(timeseries_list)) print('firings_list' + str(firings_list)) print('firings_out' + str(firings_out)) print('time_offsets ' + str(time_offsets)) if time_offsets: time_offsets = np.fromstring(time_offsets, dtype=np.float_, sep=',') #print('time_offsets ' + str(time_offsets)) else: print( 'No time offsets provided - assuming zero time gap/continuously recorded data' ) time_offsets = np.zeros(len(timeseries_list)) # Get toffsets based on length of preceeding timeseries - first one left as zero for timeseries in range(len(timeseries_list) - 1): X = DiskReadMda(timeseries_list[timeseries]) time_offsets[timeseries + 1] = time_offsets[timeseries] + X.N2() concatenated_firings = concat_and_increment(firings_list, time_offsets) (dmatrix, k1_dmatrix, k2_dmatrix, templates, Kmaxes) = get_dmatrix_templates(timeseries_list, firings_list) dmatrix[np.isnan(dmatrix)] = -1 # set nans to -1 to avoid runtime error k1_dmatrix[ dmatrix < 0] = np.nan # replace all negative dist numbers (no comparison) with NaN k2_dmatrix[ dmatrix < 0] = np.nan # replace all negative dist numbers (no comparison) with NaN dmatrix[ dmatrix < 0] = np.nan # then replace all negative dist numbers (no comparison) with NaN #TODO: Improve join function pairs_to_merge = get_join_matrix(dmatrix, k1_dmatrix, templates, Kmaxes) # Returns with base 1 adjustment pairs_to_merge = np.reshape(pairs_to_merge, (-1, 2)) pairs_to_merge = pairs_to_merge[~np.isnan(pairs_to_merge).any( axis=1)] # Eliminate all rows with NaN pairs_to_merge = pairs_to_merge[np.argsort( pairs_to_merge[:, 0])] # Assure that input is sorted #Propagate merge pairs to lowest label number for idx, label in enumerate(pairs_to_merge[:, 1]): pairs_to_merge[np.isin(pairs_to_merge[:, 0], label), 0] = pairs_to_merge[idx, 0] # Input should be sorted #Merge firing labels for merge_pair in range(pairs_to_merge.shape[0]): concatenated_firings[ 2, np.isin(concatenated_firings[2, :], pairs_to_merge[ merge_pair, 1])] = pairs_to_merge[merge_pair, 0] # Already base 1 corrected writemda64(dmatrix, dmatrix_out) writemda32(templates, dmatrix_templates_out) writemda64(k1_dmatrix, k1_dmatrix_out) writemda64(k2_dmatrix, k2_dmatrix_out) #Write return writemda64(concatenated_firings, firings_out)
def reptrack(*, timeseries, firings_out, detect_threshold=3, detect_sign=0, section_size=60 * 30000, detect_interval=20, detect_channel=0): """ Find representative spikes for the single "best"unit that stretches all the way through the dataset Parameters ---------- timeseries : INPUT The preprocessed timeseries array firings_out : OUTPUT The firings file (for the single unit) detect_channel : int Channel for detection (1-based indexing) or 0 to detect on max over all channels detect_threshold : float Threshold for detection detect_sign : int Sign for the detection -1, 0, or 1 section_size : int Size of each section (in timepoints) """ X = DiskReadMda(timeseries) M = X.N1() N = X.N2() num_sections = int(np.floor(N / section_size)) chunk_infos = [] S = 3 #number of scores to track clips_prev = np.zeros(0) for ii in range(0, num_sections): # Read the current chunk chunk0 = X.readChunk(i1=0, i2=ii * section_size, N1=M, N2=section_size) # Detect the events during this chunk and offset the times if (detect_channel > 0): signal_for_detect = chunk0[detect_channel - 1, :] else: if detect_sign == 0: signal_for_detect = np.max(np.abs(chunk0), axis=0) elif detect_sign > 0: signal_for_detect = np.max(chunk0, axis=0) else: signal_for_detect = np.min(chunk0, axis=0) times0 = detect(signal_for_detect, detect_threshold, detect_sign, detect_interval) times0 = times0 + ii * section_size L0 = len(times0) # Extract the clips for this chunk clips0 = extract_clips_helper(timeseries=timeseries, times=times0, clip_size=50) if ii == 0: # If this is the first chunk, initialize things scores0 = np.zeros((S, L0)) connections0 = np.ones(L0) * -1 else: # Some results from the previous chunk times_prev = chunk_infos[ii - 1]['times'] scores_prev = chunk_infos[ii - 1]['scores'] # Compute PCA features on the clips from this and the previous chunk combined clips_combined = np.concatenate((clips_prev, clips0), axis=2) features_combined = compute_clips_features(clips_combined, num_features=10) features0 = features_combined[:, len(times_prev):] features_prev = features_combined[:, 0:len(times_prev)] # Compute the nearest neighbors (candidates for connections) nbrs = NearestNeighbors(n_neighbors=50, algorithm='ball_tree') nbrs.fit(features_prev.transpose()) nearest_inds = nbrs.kneighbors(features0.transpose(), return_distance=False) # For each, find the best connection among the candidates scores0 = np.zeros((S, L0)) connections0 = np.zeros(L0) maxmins_prev = scores_prev[0, :] averages_prev = scores_prev[1, :] for jj in range(len(times0)): tmp = features0[:, jj] nearest_inds_jj = nearest_inds[jj, :].tolist() dists = np.linalg.norm(features_prev[:, nearest_inds_jj] - tmp.reshape((len(tmp), 1)), axis=0) normalized_distances = dists / np.linalg.norm(tmp) maxmins = np.maximum(normalized_distances, maxmins_prev[nearest_inds_jj]) averages = (normalized_distances + averages_prev[nearest_inds_jj] * (ii + 1)) / (ii + 2) overall_scores = maxmins + averages * 0.1 ind0 = np.argmin(overall_scores) scores0[0, jj] = maxmins[ind0] scores0[1, jj] = averages[ind0] scores0[2, jj] = overall_scores[ind0] connections0[jj] = nearest_inds_jj[ind0] clips_prev = clips0 # Store the results for this chunk info0 = { 'times': times0, 'connections': connections0, 'scores': scores0 } chunk_infos.append(info0) rep_times = np.zeros(len(chunk_infos)) last_chunk_info = chunk_infos[len(chunk_infos) - 1] last_times = last_chunk_info['times'] last_overall_scores = last_chunk_info['scores'][S - 1, :] last_to_first_connections = np.zeros(len(last_times)) for kk in range(0, len(last_times)): ind0 = kk for ii in range(len(chunk_infos) - 2, -1, -1): ind0 = int(chunk_infos[ii + 1]['connections'][ind0]) last_to_first_connections[kk] = ind0 print('Unique:') unique1 = np.unique(last_to_first_connections) print(len(unique1)) print(len(chunk_infos[0]['times'])) rep_times = [] rep_labels = [] for aa in range(0, len(unique1)): bb = np.where(last_to_first_connections == unique1[aa])[0] cc = np.argmax(last_overall_scores[bb]) ind0 = bb[cc] rep_times.append(last_chunk_info['times'][ind0]) rep_labels.append(aa) for ii in range(len(chunk_infos) - 1, 0, -1): ind0 = int(chunk_infos[ii]['connections'][ind0]) rep_times.append(chunk_infos[ii - 1]['times'][ind0]) rep_labels.append(aa) #ind0=np.argmin(last_chunk_info['scores'][S-1,:]) #Overall score is in row S-1 #rep_times[len(chunk_infos)-1]=last_chunk_info['times'][ind0] #for ii in range(len(chunk_infos)-1,0,-1): # ind0=int(chunk_infos[ii]['connections'][ind0]) # rep_times[ii-1]=chunk_infos[ii-1]['times'][ind0] firings = np.zeros((3, len(rep_times))) for jj in range(len(rep_times)): firings[1, jj] = rep_times[jj] firings[2, jj] = rep_labels[jj] return writemda64(firings, firings_out)