示例#1
0
 def _mapping_from_regions_list(self, new_map, lateralize=False):
     """
     From a vector of regions id, creates a mapping such as
     newids = self.mapping
     :param new_map: np.array: vector of regions id
     """
     I_ROOT = 1
     I_VOID = 0
     # to lateralize we make sure all regions are represented in + and -
     new_map = np.unique(np.r_[-new_map, new_map])
     assert np.all(np.isin(new_map, self.id)), \
         "All mapping ids should be represented in the Allen ids"
     # with the lateralization, self.id may have duplicate values so ismember is necessary
     iid, inm = ismember(self.id, new_map)
     iid = np.where(iid)[0]
     mapind = np.zeros_like(
         self.id) + I_ROOT  # non assigned regions are root
     mapind[iid] = iid  # regions present in the list have the same index
     # Starting by the higher up levels in the hierarchy, assign all descendants to the mapping
     for i in np.argsort(self.level[iid]):
         descendants = self.descendants(self.id[iid[i]]).id
         _, idesc, _ = np.intersect1d(self.id,
                                      descendants,
                                      return_indices=True)
         mapind[idesc] = iid[i]
     mapind[0] = I_VOID  # void stays void
     # to delateralize the regions, assign the positive index to all mapind elements
     if lateralize is False:
         _, iregion = ismember(np.abs(self.id), self.id)
         mapind = mapind[iregion]
     return mapind
示例#2
0
def multiple_spike_trains(firing_rates=None,
                          rec_len_secs=1000,
                          cluster_ids=None,
                          amplitude_noise=20 * 1e-6):
    """
    :param firing_rates: list or np.array of firing rates (spikes per second)
    :param rec_len_secs: recording length in seconds
    :return: spike_times, spike_amps, spike_clusters
    """
    if firing_rates is None:
        firing_rates = np.random.randint(150, 600, 10)
    if cluster_ids is None:
        cluster_ids = np.arange(firing_rates.size)
    ca = np.exp(np.random.normal(5.5, 0.5,
                                 firing_rates.size)) / 1e6  # output is in V
    st = np.empty(0)
    sc = np.empty(0)
    for i, firing_rate in enumerate(firing_rates):
        t = generate_spike_train(firing_rate=firing_rate,
                                 rec_len_secs=rec_len_secs)
        st = np.r_[st, t]
        sc = np.r_[sc, np.zeros(t.size, dtype=np.int32) + cluster_ids[i]]

    ordre = st.argsort()
    st = st[ordre]
    sc = np.int32(sc[ordre])
    _, isc = ismember(sc,
                      cluster_ids)  # clusters ids may be arbitrary: re-index
    sa = np.maximum(ca[isc] + np.random.randn(st.size) * amplitude_noise,
                    25 * 1e-6)
    return st, sa, sc
示例#3
0
def test_clusters_metrics():
    np.random.seed(54)
    rec_length = 1000
    frs = np.array([3, 100, 80, 40])  # firing rates
    cid = [0, 1, 3, 4]  # here we make sure one of the clusters has no spike
    t, a, c = multiple_spike_trains(firing_rates=frs,
                                    rec_len_secs=rec_length,
                                    cluster_ids=cid)
    d = np.sin(2 * np.pi * c / rec_length *
               t) * 100  # sinusoidal shift where cluster id drives f

    def _assertions(dfm, idf, target_cid):
        # dfm: qc dataframe, idf: indices of existing clusters in dfm, cid: cluster ids
        assert np.allclose(dfm['amp_median'][idf] / np.exp(5.5) * 1e6,
                           1,
                           rtol=1.1)
        assert np.allclose(dfm['amp_std_dB'][idf] / 20 * np.log10(np.exp(0.5)),
                           1,
                           rtol=1.1)
        assert np.allclose(dfm['drift'][idf],
                           np.array(cid) * 100 * 4 * 3.6,
                           rtol=1.1)
        assert np.allclose(dfm['firing_rate'][idf], frs, rtol=1.1)
        assert np.allclose(dfm['cluster_id'], target_cid)

    # check with missing clusters
    dfm = quick_unit_metrics(c,
                             t,
                             a,
                             d,
                             cluster_ids=np.arange(5),
                             tbounds=[100, 900])
    idf, _ = ismember(np.arange(5), cid)
    _assertions(dfm, idf, np.arange(5))
示例#4
0
    def test_uuids_intersections(self):
        ntotal = 500
        nsub = 17
        nadd = 3

        eids = uuid2np([uuid.uuid4() for _ in range(ntotal)])

        np.random.seed(42)
        isel = np.floor(np.argsort(np.random.random(nsub)) / nsub *
                        ntotal).astype(np.int16)
        sids = np.r_[eids[isel, :],
                     uuid2np([uuid.uuid4() for _ in range(nadd)])]
        np.random.shuffle(sids)

        # check the intersection
        v, i0, i1 = intersect2d(eids, sids)
        assert np.all(eids[i0, :] == sids[i1, :])
        assert np.all(np.sort(isel) == np.sort(i0))

        v_, i0_, i1_ = np.intersect1d(eids[:, 0],
                                      sids[:, 0],
                                      return_indices=True)
        assert np.setxor1d(v_, v[:, 0]).size == 0
        assert np.setxor1d(i0, i0_).size == 0
        assert np.setxor1d(i1, i1_).size == 0

        for a, b in zip(ismember2d(sids, eids),
                        ismember(sids[:, 0], eids[:, 0])):
            assert np.all(a == b)

        # check conversion to numpy back and forth
        uuids = [uuid.uuid4() for _ in np.arange(4)]
        np_uuids = uuid2np(uuids)
        assert np2uuid(np_uuids) == uuids
示例#5
0
def remap(ids, source='Allen', dest='Beryl', output='acronym'):
    br = BrainRegions()
    _, inds = ismember(ids, br.id[br.mappings[source]])
    ids = br.id[br.mappings[dest][inds]]
    if output == 'id':
        return br.id[br.mappings[dest][inds]]
    elif output == 'acronym':
        return br.get(br.id[br.mappings[dest][inds]])['acronym']
示例#6
0
文件: one.py 项目: mainenlab/ibllib
 def _make_dataclass_offline(self, eid, dataset_types=None, cache_dir=None, **kwargs):
     if self._cache.size == 0:
         return SessionDataInfo()
     # select the session
     npeid = parquet.str2np(eid)[0]
     df = self._cache[self._cache['eid_0'] == npeid[0]]
     df = df[df['eid_1'] == npeid[1]]
     # select datasets
     df = df[ismember(df['dataset_type'], dataset_types)[0]]
     return SessionDataInfo.from_pandas(df, self._get_cache_dir(cache_dir))
示例#7
0
文件: atlas.py 项目: mainenlab/ibllib
 def _navigate_tree(self, ids, direction='down'):
     """
     Private method to navigate the tree and get all related objects either up or down
     :param ids:
     :param direction:
     :return: Bunch
     """
     indices = ismember(self.id, ids)[0]
     count = np.sum(indices)
     while True:
         if direction == 'down':
             indices |= ismember(self.parent, self.id[indices])[0]
         elif direction == 'up':
             indices |= ismember(self.id, self.parent[indices])[0]
         else:
             raise ValueError("direction should be either 'up' or 'down'")
         if count == np.sum(indices):  # last iteration didn't find any match
             break
         else:
             count = np.sum(indices)
     return self.get(self.id[indices])
示例#8
0
    def find_traj_is_best(self, provenance='Histology track'):
        val = PROV_2_VAL[provenance]
        next_provenance = VAL_2_PROV[val + 20]

        if not 'traj' in self.traj[provenance].keys():
            self.get_traj_for_provenance(provenance)
        if not 'traj' in self.traj[next_provenance].keys():
            self.get_traj_for_provenance(next_provenance)

        isin, _ = ismember(self.traj[provenance]['ins'],
                           self.traj[next_provenance]['ins'])
        self.traj[provenance]['is_best'] = np.where(np.invert(isin))[0]

        # Special exception for planned provenance
        if provenance == 'Planned':
            next_provenance = VAL_2_PROV[val + 40]
            if not 'traj' in self.traj[next_provenance].keys():
                self.get_traj_for_provenance(next_provenance)
            isin, _ = ismember(self.traj[provenance]['ins'][self.traj[provenance]['is_best']],
                               self.traj[next_provenance]['ins'])
            self.traj[provenance]['is_best'] = (self.traj[provenance]['is_best']
                                                [np.where(np.invert(isin))[0]])
示例#9
0
 def test_ismember2d_uuids(self):
     nb = 20
     na = 500
     np.random.seed(42)
     a = np.random.randint(0, nb + 3, na)
     b = np.arange(nb)
     lia, locb = bnum.ismember(a, b)
     bb = np.random.randint(low=np.iinfo(np.int64).min,
                            high=np.iinfo(np.int64).max,
                            size=(nb, 2),
                            dtype=np.int64)
     aa = np.zeros((na, 2), dtype=np.int64)
     aa[lia, :] = bb[locb, :]
     lia_, locb_ = bnum.ismember2d(aa, bb)
     assert np.all(lia == lia_) & np.all(locb == locb_)
     bb[:, 0] = 0
     aa[:, 0] = 0
     # if the first column is equal, the distinction is to be made on the second\
     assert np.unique(bb[:, 1]).size == nb
     lia_, locb_ = bnum.ismember2d(aa, bb)
     assert np.all(lia == lia_) & np.all(locb == locb_)
def remap(ids, source='Allen', dest='Beryl'):
    _, inds = ismember(ids, br.id[br.mappings[source]])
    return br.id[br.mappings[dest][inds]]
示例#11
0
 def __init__(self,
              res_um=25,
              brainmap='Allen',
              scaling=np.array([1, 1, 1]),
              mock=False,
              hist_path=None):
     """
     :param res_um: 10, 25 or 50 um
     :param brainmap: defaults to 'Allen', see ibllib.atlas.BrainRegion for re-mappings
     :param scaling: scale factor along ml, ap, dv for squeeze and stretch ([1, 1, 1])
     :param mock: for testing purpose
     :param hist_path
     :return: atlas.BrainAtlas
     """
     par = params.read('one_params')
     FLAT_IRON_ATLAS_REL_PATH = Path('histology', 'ATLAS', 'Needles',
                                     'Allen')
     LUT_VERSION = "v01"  # version 01 is the lateralized version
     regions = BrainRegions()
     xyz2dims = np.array([1, 0, 2])  # this is the c-contiguous ordering
     dims2xyz = np.array([1, 0, 2])
     # we use Bregma as the origin
     self.res_um = res_um
     ibregma = (ALLEN_CCF_LANDMARKS_MLAPDV_UM['bregma'] / self.res_um)
     dxyz = self.res_um * 1e-6 * np.array([1, -1, -1]) * scaling
     if mock:
         image, label = [
             np.zeros((528, 456, 320), dtype=np.int16) for _ in range(2)
         ]
         label[:, :, 100:
               105] = 1327  # lookup index for retina, id 304325711 (no id 1327)
     else:
         path_atlas = Path(par.CACHE_DIR).joinpath(FLAT_IRON_ATLAS_REL_PATH)
         file_image = hist_path or path_atlas.joinpath(
             f'average_template_{res_um}.nrrd')
         # get the image volume
         if not file_image.exists():
             _download_atlas_flatiron(file_image, FLAT_IRON_ATLAS_REL_PATH,
                                      par)
         # get the remapped label volume
         file_label = path_atlas.joinpath(f'annotation_{res_um}.nrrd')
         if not file_label.exists():
             _download_atlas_flatiron(file_label, FLAT_IRON_ATLAS_REL_PATH,
                                      par)
         file_label_remap = path_atlas.joinpath(
             f'annotation_{res_um}_lut_{LUT_VERSION}.npz')
         if not file_label_remap.exists():
             label = self._read_volume(file_label)
             _logger.info("computing brain atlas annotations lookup table")
             # lateralize atlas: for this the regions of the left hemisphere have primary
             # keys opposite to to the normal ones
             lateral = np.zeros(label.shape[xyz2dims[0]])
             lateral[int(np.floor(ibregma[0]))] = 1
             lateral = np.sign(
                 np.cumsum(lateral)[np.newaxis, :, np.newaxis] - 0.5)
             label = label * lateral
             _, im = ismember(label, regions.id)
             label = np.reshape(im.astype(np.uint16), label.shape)
             _logger.info(f"saving {file_label_remap} ...")
             np.savez_compressed(file_label_remap, label)
         # loads the files
         label = self._read_volume(file_label_remap)
         image = self._read_volume(file_image)
     super().__init__(image,
                      label,
                      dxyz,
                      regions,
                      ibregma,
                      dims2xyz=dims2xyz,
                      xyz2dims=xyz2dims)
示例#12
0
def quick_unit_metrics(spike_clusters,
                       spike_times,
                       spike_amps,
                       spike_depths,
                       params=METRICS_PARAMS,
                       cluster_ids=None,
                       tbounds=None):
    """
    Computes single unit metrics from only the spike times, amplitudes, and
    depths for a set of units.

    Metrics computed:
        'amp_max',
        'amp_min',
        'amp_median',
        'amp_std_dB',
        'contamination',
        'contamination_alt',
        'drift',
        'missed_spikes_est',
        'noise_cutoff',
        'presence_ratio',
        'presence_ratio_std',
        'slidingRP_viol',
        'spike_count'

    Parameters (see the METRICS_PARAMS constant)
    ----------
    spike_clusters : ndarray_like
        A vector of the unit ids for a set of spikes.
    spike_times : ndarray_like
        A vector of the timestamps for a set of spikes.
    spike_amps : ndarray_like
        A vector of the amplitudes for a set of spikes.
    spike_depths : ndarray_like
        A vector of the depths for a set of spikes.
    clusters_id: (optional) lists of cluster ids. If not all clusters are represented in the
    spikes_clusters (ie. cluster has no spike), this will ensure the output size is consistent
    with the input arrays.
    tbounds: (optional) list or 2 elements array containing a time-selection to perform the
     metrics computation on.
    params : dict (optional)
        Parameters used for computing some of the metrics in the function:
            'presence_window': float
                The time window (in s) used to look for spikes when computing the presence ratio.
            'refractory_period': float
                The refractory period used when computing isi violations and the contamination
                estimate.
            'min_isi': float
                The minimum interspike-interval (in s) for counting duplicate spikes when computing
                the contamination estimate.
            'spks_per_bin_for_missed_spks_est': int
                The number of spikes per bin used to compute the spike amplitude pdf for a unit,
                when computing the missed spikes estimate.
            'std_smoothing_kernel_for_missed_spks_est': float
                The standard deviation for the gaussian kernel used to compute the spike amplitude
                pdf for a unit, when computing the missed spikes estimate.
            'min_num_bins_for_missed_spks_est': int
                The minimum number of bins used to compute the spike amplitude pdf for a unit,
                when computing the missed spikes estimate.

    Returns
    -------
    r : bunch
        A bunch whose keys are the computed spike metrics.

    Notes
    -----
    This function is called by `ephysqc.unit_metrics_ks2` which is called by `spikes.ks2_to_alf`
    during alf extraction of an ephys dataset in the ibl ephys extraction pipeline.

    Examples
    --------
    1) Compute quick metrics from a ks2 output directory:
        >>> from ibllib.ephys.ephysqc import phy_model_from_ks2_path
        >>> m = phy_model_from_ks2_path(path_to_ks2_out)
        >>> cluster_ids = m.spike_clusters
        >>> ts = m.spike_times
        >>> amps = m.amplitudes
        >>> depths = m.depths
        >>> r = bb.metrics.quick_unit_metrics(cluster_ids, ts, amps, depths)
    """
    metrics_list = [
        'cluster_id', 'amp_max', 'amp_min', 'amp_median', 'amp_std_dB',
        'contamination', 'contamination_alt', 'drift', 'missed_spikes_est',
        'noise_cutoff', 'presence_ratio', 'presence_ratio_std',
        'slidingRP_viol', 'spike_count'
    ]
    from brainbox.numerical import between_sorted
    if tbounds:
        ispi = between_sorted(spike_times, tbounds)
        spike_times = spike_times[ispi]
        spike_clusters = spike_clusters[ispi]
        spike_amps = spike_amps[ispi]
        spike_depths = spike_depths[ispi]

    if cluster_ids is None:
        cluster_ids = np.unique(spike_clusters)
    nclust = cluster_ids.size

    r = Bunch({k: np.full((nclust, ), np.nan) for k in metrics_list})
    r['cluster_id'] = cluster_ids

    # vectorized computation of basic metrics such as presence ratio and firing rate
    tmin = spike_times[0]
    tmax = spike_times[-1]
    presence_ratio = bincount2D(spike_times,
                                spike_clusters,
                                xbin=params['presence_window'],
                                ybin=cluster_ids,
                                xlim=[tmin, tmax])[0]
    r.presence_ratio = np.sum(presence_ratio > 0,
                              axis=1) / presence_ratio.shape[1]
    r.presence_ratio_std = np.std(presence_ratio, axis=1)
    r.spike_count = np.sum(presence_ratio, axis=1)
    r.firing_rate = r.spike_count / (tmax - tmin)

    # computing amplitude statistical indicators by aggregating over cluster id
    camp = pd.DataFrame(np.c_[spike_amps, 20 * np.log10(spike_amps),
                              spike_clusters],
                        columns=['amps', 'log_amps', 'clusters'])
    camp = camp.groupby('clusters')
    ir, ib = ismember(r.cluster_id, camp.clusters.unique())
    r.amp_min[ir] = np.array(camp['amps'].min())
    r.amp_max[ir] = np.array(camp['amps'].max())
    # this is the geometric median
    r.amp_median[ir] = np.array(10**(camp['log_amps'].median() / 20))
    r.amp_std_dB[ir] = np.array(camp['log_amps'].std())

    # loop over each cluster to compute the rest of the metrics
    for ic in np.arange(nclust):
        # slice the spike_times array
        ispikes = spike_clusters == cluster_ids[ic]
        if np.all(~ispikes):  # if this cluster has no spikes, continue
            continue
        ts = spike_times[ispikes]
        amps = spike_amps[ispikes]
        depths = spike_depths[ispikes]

        # compute metrics
        r.contamination_alt[ic] = contamination_alt(
            ts, rp=params['refractory_period'])
        r.contamination[ic], _ = contamination(ts,
                                               tmin,
                                               tmax,
                                               rp=params['refractory_period'],
                                               min_isi=params['min_isi'])
        r.slidingRP_viol[ic] = slidingRP_viol(
            ts,
            bin_size=params['bin_size'],
            thresh=params['RPslide_thresh'],
            acceptThresh=params['acceptable_contamination'])
        r.noise_cutoff[ic] = noise_cutoff(
            amps,
            quartile_length=params['nc_quartile_length'],
            n_bins=params['nc_bins'],
            n_low_bins=params['nc_n_low_bins'])
        r.missed_spikes_est[ic], _, _ = missed_spikes_est(
            amps,
            spks_per_bin=params['spks_per_bin_for_missed_spks_est'],
            sigma=params['std_smoothing_kernel_for_missed_spks_est'],
            min_num_bins=params['min_num_bins_for_missed_spks_est'])

        # wonder if there is a need to low-cut this
        r.drift[ic] = np.sum(np.abs(np.diff(depths))) / (tmax - tmin) * 3600

    r.label = compute_labels(r)
    return r
示例#13
0
 def _check_ismember(a, b, lia_, locb_):
     lia, locb = bnum.ismember(a, b)
     self.assertTrue(np.all(a[lia] == b[locb]))
     self.assertTrue(np.all(lia_ == lia))
     self.assertTrue(np.all(locb_ == locb))