示例#1
0
def normalize_channels(*, timeseries, timeseries_out):
    """
    Normalize the channels in a timeseries array to each have unit variance

    Parameters
    ----------
    timeseries : INPUT
        Path of timeseries, MxN where M is number of channels and N number of timepoints, in .mda format
        
    timeseries_out : OUTPUT
        Path of output timeseries in .mda format            
    """

    X = DiskReadMda(timeseries)
    M, N = X.N1(), X.N2()
    _writer = DiskWriteMda(timeseries_out, [M, N], dt=X.dt())

    chunk_size_mb = 100
    normalize_channels._sums = np.zeros(M)
    normalize_channels._sumsqrs = np.zeros(M)

    def _kernel_compute_sumsqrs(chunk, info):
        normalize_channels._sums = normalize_channels._sums + np.sum(chunk,
                                                                     axis=1)
        normalize_channels._sumsqrs = normalize_channels._sumsqrs + np.sum(
            chunk**2, axis=1)
        return True

    def _kernel_normalize_and_write(chunk, info):
        Nchunk = chunk.shape[1]
        means = normalize_channels._sums / N
        variances = (normalize_channels._sumsqrs -
                     normalize_channels._sums**2 / N) / (N - 1)
        stdevs = np.sqrt(variances)
        stdevs[np.where(stdevs == 0)] = 1
        means = np.reshape(means, (M, 1))
        stdevs = np.reshape(stdevs, (M, 1))
        chunk = (chunk - np.tile(means,
                                 (1, Nchunk))) / np.tile(stdevs, (1, Nchunk))
        return _writer.writeChunk(chunk, i1=0, i2=info.t1)

    TCR = TimeseriesChunkReader(chunk_size_mb=chunk_size_mb, overlap_size=0)
    if not TCR.run(timeseries, _kernel_compute_sumsqrs):
        return False
    if not TCR.run(timeseries, _kernel_normalize_and_write):
        return False
    return True
示例#2
0
def compute_templates_helper(*, timeseries, firings, clip_size=100):
    X = DiskReadMda(timeseries)
    M, N = X.N1(), X.N2()
    N = N
    F = readmda(firings)
    L = F.shape[1]
    L = L
    T = clip_size
    times = F[1, :]
    labels = F[2, :].astype(int)
    K = np.max(labels)
    compute_templates._sums = np.zeros((M, T, K))
    compute_templates._counts = np.zeros(K)

    def _kernel(chunk, info):
        inds = np.where((info.t1 <= times) & (times <= info.t2))[0]
        times0 = (times[inds] - info.t1 + info.t1a).astype(np.int32)
        labels0 = labels[inds]

        clips0 = np.zeros((M, clip_size, len(inds)),
                          dtype=np.float32,
                          order='F')
        cpp.extract_clips(clips0, chunk, times0, clip_size)

        for k in range(1, K + 1):
            inds_kk = np.where(labels0 == k)[0]
            compute_templates._sums[:, :, k -
                                    1] = compute_templates._sums[:, :, k -
                                                                 1] + np.sum(
                                                                     clips0[:, :,
                                                                            inds_kk],
                                                                     axis=2)
            compute_templates._counts[
                k - 1] = compute_templates._counts[k - 1] + len(inds_kk)
        return True

    TCR = TimeseriesChunkReader(chunk_size_mb=40, overlap_size=clip_size * 2)
    if not TCR.run(timeseries, _kernel):
        return None
    templates = np.zeros((M, T, K))
    for k in range(1, K + 1):
        if compute_templates._counts[k - 1]:
            templates[:, :, k -
                      1] = compute_templates._sums[:, :, k -
                                                   1] / compute_templates._counts[
                                                       k - 1]
    return templates
示例#3
0
    def run(self, mdafile_path_or_diskreadmda, func):
        if (type(mdafile_path_or_diskreadmda) == str):
            X = DiskReadMda(mdafile_path_or_diskreadmda)
        else:
            X = mdafile_path_or_diskreadmda
        M, N = X.N1(), X.N2()
        cs = max(
            [self._chunk_size,
             int(self._chunk_size_mb * 1e6 / (M * 4)), M])
        if self._t1 < 0:
            self._t1 = 0
        if self._t2 < 0:
            self._t2 = N - 1
        t = self._t1
        while t <= self._t2:
            t1 = t
            t2 = min(self._t2, t + cs - 1)
            s1 = max(0, t1 - self._overlap_size)
            s2 = min(N - 1, t2 + self._overlap_size)

            timer = time.time()
            chunk = X.readChunk(i1=0, N1=M, i2=s1, N2=s2 - s1 + 1)
            self._elapsed_reading += time.time() - timer

            info = TimeseriesChunkInfo()
            info.t1 = t1
            info.t2 = t2
            info.t1a = t1 - s1
            info.t2a = t2 - s1
            info.size = t2 - t1 + 1

            timer = time.time()
            if not func(chunk, info):
                return False
            self._elapsed_running += time.time() - timer

            t = t + cs
        if self._verbose:
            print(
                'Elapsed for TimeseriesChunkReader: %g sec reading, %g sec running'
                % (self._elapsed_reading, self._elapsed_running))
        return True
示例#4
0
def extract_clips_helper(*, timeseries, times, clip_size=100, verbose=False):
    X = DiskReadMda(timeseries)
    M, N = X.N1(), X.N2()
    L = times.size
    T = clip_size
    extract_clips_helper._clips = np.zeros((M, T, L))

    def _kernel(chunk, info):
        inds = np.where((info.t1 <= times) & (times <= info.t2))[0]
        times0 = times[inds] - info.t1 + info.t1a
        clips0 = np.zeros((M, clip_size, len(inds)),
                          dtype=np.float32,
                          order='F')
        cpp.extract_clips(clips0, chunk, times0, clip_size)

        extract_clips_helper._clips[:, :, inds] = clips0
        return True

    TCR = TimeseriesChunkReader(chunk_size_mb=100,
                                overlap_size=clip_size * 2,
                                verbose=verbose)
    if not TCR.run(timeseries, _kernel):
        return None
    return extract_clips_helper._clips
def extract_timeseries(*,
                       timeseries,
                       channels_array='',
                       timeseries_out,
                       channels='',
                       t1=-1,
                       t2=-1,
                       timeseries_dtype='',
                       timeseries_num_channels=0):
    """
    Extract a chunk of a timeseries dataset and possibly a subset of channels

    Parameters
    ----------
    timeseries : INPUT
        Path of timeseries, MxN where M is number of channels and N number of timepoints, in either .mda or raw binary format. If raw binary, then you must supply dtype and num_channels.
    channels_array : INPUT 
        Path of array of channel numbers (positive integers). Either use this or the channels parameter, not both.
        
    timeseries_out : OUTPUT
        Path of output timeseries in .mda format    
        
    channels : string
        Comma-separated list of channels to extract. Either use this or the channels_array input, not both.
    t1 : integer
        Integer start timepoint (zero-based indexing). If -1 will set to zero.
    t2 : integer
        Integer end timepoint (zero-based indexing). If -1 will set to N-1."},
    timeseries_dtype : string
        Only supply this if timeseries is in raw binary format. Choices are int16, uint16, int32, float32, etc.
    timeseries_num_channels : integer
        Only supply this if timeseries is in raw binary format. Integer representing number of channels. Number of timepoints will be deduced
    """
    if channels:
        _channels = np.fromstring(channels, dtype=int, sep=',')
    elif channels_array:
        _channels = readmda(channels_array).ravel()
    else:
        _channels = np.empty(0)

    header0 = None
    if (timeseries_dtype):
        size_bytes = os.path.getsize(timeseries)
        num_bytes_per_entry = get_num_bytes_per_entry_from_dt(timeseries_dtype)
        if t2 >= 0:
            num_entries = (t2 + 1) * (timeseries_num_channels)
        else:
            num_entries = size_bytes / num_bytes_per_entry
            if (num_entries % timeseries_num_channels != 0):
                print(
                    "File size (%ld) is not divisible by number of channels (%g) for dtype=%s"
                    % (size_bytes, timeseries_num_channels, timeseries_dtype))
                return False
        num_timepoints = num_entries / timeseries_num_channels
        header0 = MdaHeader(timeseries_dtype,
                            [timeseries_num_channels, num_timepoints])

    X = DiskReadMda(timeseries, header0)
    M, N = X.N1(), X.N2()
    if (_channels.size == 0):
        _channels = np.array(1 + np.arange(M))
    M2 = _channels.size

    if (t1 < 0):
        t1 = 0
    if (t2 < 0):
        t2 = N - 1

    N2 = t2 - t1 + 1

    _writer = DiskWriteMda(timeseries_out, [M2, N2], dt=X.dt())

    def _kernel(chunk, info):
        chunk = chunk[(_channels - 1).tolist(), ]
        return _writer.writeChunk(chunk, i1=0, i2=info.t1)

    chunk_size_mb = 100
    TCR = TimeseriesChunkReader(chunk_size_mb=chunk_size_mb,
                                overlap_size=0,
                                t1=t1,
                                t2=t2)
    return TCR.run(X, _kernel)
示例#6
0
def anneal_segments(*,
                    timeseries_list,
                    firings_list,
                    firings_out,
                    dmatrix_out='',
                    k1_dmatrix_out='',
                    k2_dmatrix_out='',
                    dmatrix_templates_out='',
                    time_offsets):
    """
    Combine a list of firings files to form a single firings file
    Link firings labels to first firings.mda, all other firings labels are incremented

    Parameters
    ----------
    timeseries_list : INPUT
        A list of paths of timeseries mda files to be used for drift adjustment / time offsets
    firings_list : INPUT
        A list of paths of firings mda files to be concatenated/drift adjusted
    firings_out : OUTPUT
        The output firings
    dmatrix_out : OUTPUT
        The distance matrix used
    k1_dmatrix_out : OUTPUT
        The mean distances of k1 templates to k1 spikes
    k2_dmatrix_out : OUTPUT
        The mean distances of k2 templates to k2 spikes
    dmatrix_templates_out : OUTPUT
        The templates used to compute the distance matrix
        ...
        

    time_offsets : string
        An array of time offsets for each firings file. Expect one offset for each firings file.
        ...
    """
    print('timeseries_list' + str(timeseries_list))
    print('firings_list' + str(firings_list))
    print('firings_out' + str(firings_out))
    print('time_offsets ' + str(time_offsets))
    if time_offsets:
        time_offsets = np.fromstring(time_offsets, dtype=np.float_, sep=',')
        #print('time_offsets ' + str(time_offsets))
    else:
        print(
            'No time offsets provided - assuming zero time gap/continuously recorded data'
        )
        time_offsets = np.zeros(len(timeseries_list))
        # Get toffsets based on length of preceeding timeseries - first one left as zero
        for timeseries in range(len(timeseries_list) - 1):
            X = DiskReadMda(timeseries_list[timeseries])
            time_offsets[timeseries + 1] = time_offsets[timeseries] + X.N2()

    concatenated_firings = concat_and_increment(firings_list, time_offsets)

    (dmatrix, k1_dmatrix, k2_dmatrix, templates,
     Kmaxes) = get_dmatrix_templates(timeseries_list, firings_list)
    dmatrix[np.isnan(dmatrix)] = -1
    # set nans to -1 to avoid runtime error
    k1_dmatrix[
        dmatrix <
        0] = np.nan  # replace all negative dist numbers (no comparison) with NaN
    k2_dmatrix[
        dmatrix <
        0] = np.nan  # replace all negative dist numbers (no comparison) with NaN
    dmatrix[
        dmatrix <
        0] = np.nan  # then replace all negative dist numbers (no comparison) with NaN

    #TODO: Improve join function
    pairs_to_merge = get_join_matrix(dmatrix, k1_dmatrix, templates,
                                     Kmaxes)  # Returns with base 1 adjustment

    pairs_to_merge = np.reshape(pairs_to_merge, (-1, 2))
    pairs_to_merge = pairs_to_merge[~np.isnan(pairs_to_merge).any(
        axis=1)]  # Eliminate all rows with NaN
    pairs_to_merge = pairs_to_merge[np.argsort(
        pairs_to_merge[:, 0])]  # Assure that input is sorted

    #Propagate merge pairs to lowest label number
    for idx, label in enumerate(pairs_to_merge[:, 1]):
        pairs_to_merge[np.isin(pairs_to_merge[:, 0], label),
                       0] = pairs_to_merge[idx, 0]  # Input should be sorted

    #Merge firing labels
    for merge_pair in range(pairs_to_merge.shape[0]):
        concatenated_firings[
            2,
            np.isin(concatenated_firings[2, :], pairs_to_merge[
                merge_pair,
                1])] = pairs_to_merge[merge_pair,
                                      0]  # Already base 1 corrected

    writemda64(dmatrix, dmatrix_out)
    writemda32(templates, dmatrix_templates_out)
    writemda64(k1_dmatrix, k1_dmatrix_out)
    writemda64(k2_dmatrix, k2_dmatrix_out)

    #Write
    return writemda64(concatenated_firings, firings_out)
示例#7
0
def reptrack(*,
             timeseries,
             firings_out,
             detect_threshold=3,
             detect_sign=0,
             section_size=60 * 30000,
             detect_interval=20,
             detect_channel=0):
    """
    Find representative spikes for the single "best"unit that stretches all the way through the dataset

    Parameters
    ----------
    timeseries : INPUT
        The preprocessed timeseries array
    firings_out : OUTPUT
        The firings file (for the single unit)

    detect_channel : int
        Channel for detection (1-based indexing) or 0 to detect on max over all channels
    detect_threshold : float
        Threshold for detection
    detect_sign : int
        Sign for the detection -1, 0, or 1
    section_size : int
        Size of each section (in timepoints)
    """

    X = DiskReadMda(timeseries)
    M = X.N1()
    N = X.N2()
    num_sections = int(np.floor(N / section_size))
    chunk_infos = []

    S = 3  #number of scores to track

    clips_prev = np.zeros(0)
    for ii in range(0, num_sections):
        # Read the current chunk
        chunk0 = X.readChunk(i1=0, i2=ii * section_size, N1=M, N2=section_size)

        # Detect the events during this chunk and offset the times
        if (detect_channel > 0):
            signal_for_detect = chunk0[detect_channel - 1, :]
        else:
            if detect_sign == 0:
                signal_for_detect = np.max(np.abs(chunk0), axis=0)
            elif detect_sign > 0:
                signal_for_detect = np.max(chunk0, axis=0)
            else:
                signal_for_detect = np.min(chunk0, axis=0)
        times0 = detect(signal_for_detect, detect_threshold, detect_sign,
                        detect_interval)
        times0 = times0 + ii * section_size
        L0 = len(times0)

        # Extract the clips for this chunk
        clips0 = extract_clips_helper(timeseries=timeseries,
                                      times=times0,
                                      clip_size=50)
        if ii == 0:
            # If this is the first chunk, initialize things
            scores0 = np.zeros((S, L0))
            connections0 = np.ones(L0) * -1
        else:
            # Some results from the previous chunk
            times_prev = chunk_infos[ii - 1]['times']
            scores_prev = chunk_infos[ii - 1]['scores']

            # Compute PCA features on the clips from this and the previous chunk combined
            clips_combined = np.concatenate((clips_prev, clips0), axis=2)
            features_combined = compute_clips_features(clips_combined,
                                                       num_features=10)
            features0 = features_combined[:, len(times_prev):]
            features_prev = features_combined[:, 0:len(times_prev)]

            # Compute the nearest neighbors (candidates for connections)
            nbrs = NearestNeighbors(n_neighbors=50, algorithm='ball_tree')
            nbrs.fit(features_prev.transpose())
            nearest_inds = nbrs.kneighbors(features0.transpose(),
                                           return_distance=False)

            # For each, find the best connection among the candidates
            scores0 = np.zeros((S, L0))
            connections0 = np.zeros(L0)
            maxmins_prev = scores_prev[0, :]
            averages_prev = scores_prev[1, :]
            for jj in range(len(times0)):
                tmp = features0[:, jj]
                nearest_inds_jj = nearest_inds[jj, :].tolist()
                dists = np.linalg.norm(features_prev[:, nearest_inds_jj] -
                                       tmp.reshape((len(tmp), 1)),
                                       axis=0)
                normalized_distances = dists / np.linalg.norm(tmp)
                maxmins = np.maximum(normalized_distances,
                                     maxmins_prev[nearest_inds_jj])
                averages = (normalized_distances +
                            averages_prev[nearest_inds_jj] *
                            (ii + 1)) / (ii + 2)
                overall_scores = maxmins + averages * 0.1
                ind0 = np.argmin(overall_scores)
                scores0[0, jj] = maxmins[ind0]
                scores0[1, jj] = averages[ind0]
                scores0[2, jj] = overall_scores[ind0]
                connections0[jj] = nearest_inds_jj[ind0]

        clips_prev = clips0

        # Store the results for this chunk
        info0 = {
            'times': times0,
            'connections': connections0,
            'scores': scores0
        }
        chunk_infos.append(info0)

    rep_times = np.zeros(len(chunk_infos))
    last_chunk_info = chunk_infos[len(chunk_infos) - 1]

    last_times = last_chunk_info['times']
    last_overall_scores = last_chunk_info['scores'][S - 1, :]
    last_to_first_connections = np.zeros(len(last_times))
    for kk in range(0, len(last_times)):
        ind0 = kk
        for ii in range(len(chunk_infos) - 2, -1, -1):
            ind0 = int(chunk_infos[ii + 1]['connections'][ind0])
        last_to_first_connections[kk] = ind0

    print('Unique:')
    unique1 = np.unique(last_to_first_connections)
    print(len(unique1))
    print(len(chunk_infos[0]['times']))

    rep_times = []
    rep_labels = []
    for aa in range(0, len(unique1)):
        bb = np.where(last_to_first_connections == unique1[aa])[0]
        cc = np.argmax(last_overall_scores[bb])
        ind0 = bb[cc]
        rep_times.append(last_chunk_info['times'][ind0])
        rep_labels.append(aa)
        for ii in range(len(chunk_infos) - 1, 0, -1):
            ind0 = int(chunk_infos[ii]['connections'][ind0])
            rep_times.append(chunk_infos[ii - 1]['times'][ind0])
            rep_labels.append(aa)

    #ind0=np.argmin(last_chunk_info['scores'][S-1,:]) #Overall score is in row S-1
    #rep_times[len(chunk_infos)-1]=last_chunk_info['times'][ind0]
    #for ii in range(len(chunk_infos)-1,0,-1):
    #    ind0=int(chunk_infos[ii]['connections'][ind0])
    #    rep_times[ii-1]=chunk_infos[ii-1]['times'][ind0]

    firings = np.zeros((3, len(rep_times)))
    for jj in range(len(rep_times)):
        firings[1, jj] = rep_times[jj]
        firings[2, jj] = rep_labels[jj]
    return writemda64(firings, firings_out)