Python KKFileSchema.coerce примеры, KKFileSchema.KKFileSchema.coerce Python примеры использования

Пример #1

0

Показать файл

def append_duplicated_spikes(data_dir,
                             output_dir,
                             groupnum,
                             idxs,
                             n_samples=24):
    """Appends a fake neuron of duplicated spikes.
    
    This is useful for testing whether some of the spikes are all in one
    part of the cluster, which might suggest drift or bad clustering.
    
    data_dir : klusters directory of original data (will not be modified)
    output_dir : klusters directory containing copy of original data
        (THIS ONE WILL BE MODIFIED!)
        Copy over all clu, fet, res, etc files to the new directory.
    
    groupnum : tetrode number, ie extension of klusters files to modify
    
    idxs : indexes of spikes to duplicate as a new cluster
        This functions doesn't know which unit you are trying to clone (if
        any), so the indexes should be indexes into ALL of the spikes from
        the group.
    
    It will extract the times, features, and waveforms of the indexed spikes,
    then append them to the end of the same files in output_dir.
    
    The new cluster has an ID one greater than previous max.
    """
    # find files
    kfs1 = KKFileSchema.coerce(data_dir)
    kfs2 = KKFileSchema.coerce(output_dir)

    # Duplicate clu
    clu = kkpandas.kkio.read_clufile(kfs1.clufiles[groupnum])
    newclunum = clu.max() + 1
    newclu = pandas.concat(
        [clu, pandas.Series(newclunum * np.ones(len(idxs)), dtype=np.int)],
        ignore_index=True)
    kkpandas.kkio.write_clufile(newclu, kfs2.clufiles[groupnum])

    # Duplicate res
    res = kkpandas.kkio.read_resfile(kfs1.resfiles[groupnum])
    newres = pandas.concat([res, res.ix[idxs]], ignore_index=True)
    kkpandas.kkio.write_resfile(newres, kfs2.resfiles[groupnum])

    # Duplicate fet
    fet = kkpandas.kkio.read_fetfile(kfs1.fetfiles[groupnum])
    newfet = pandas.concat([fet, fet.ix[idxs]], ignore_index=True)
    kkpandas.kkio.write_fetfile(newfet, kfs2.fetfiles[groupnum])

    # Duplicate spk
    spk = kkpandas.kkio.read_spkfile(kfs1.spkfiles[groupnum],
                                     n_samples=24,
                                     n_spikes=fet.shape[0])
    newspk = np.concatenate([spk, spk[idxs, :]], axis=0)
    kkpandas.kkio.write_spkfile(newspk, kfs2.spkfiles[groupnum])

Пример #2

0

Показать файл

Файл: kkio.py Проект: mcleonard/memory

def append_duplicated_spikes(data_dir, output_dir, groupnum, idxs, n_samples=24):
    """Appends a fake neuron of duplicated spikes.
    
    This is useful for testing whether some of the spikes are all in one
    part of the cluster, which might suggest drift or bad clustering.
    
    data_dir : klusters directory of original data (will not be modified)
    output_dir : klusters directory containing copy of original data
        (THIS ONE WILL BE MODIFIED!)
        Copy over all clu, fet, res, etc files to the new directory.
    
    groupnum : tetrode number, ie extension of klusters files to modify
    
    idxs : indexes of spikes to duplicate as a new cluster
        This functions doesn't know which unit you are trying to clone (if
        any), so the indexes should be indexes into ALL of the spikes from
        the group.
    
    It will extract the times, features, and waveforms of the indexed spikes,
    then append them to the end of the same files in output_dir.
    
    The new cluster has an ID one greater than previous max.
    """
    # find files
    kfs1 = KKFileSchema.coerce(data_dir)
    kfs2 = KKFileSchema.coerce(output_dir)
    
    # Duplicate clu
    clu = kkpandas.kkio.read_clufile(kfs1.clufiles[groupnum])
    newclunum = clu.max() + 1
    newclu = pandas.concat([clu, 
        pandas.Series(newclunum * np.ones(len(idxs)), dtype=np.int)], 
        ignore_index=True)
    kkpandas.kkio.write_clufile(newclu, kfs2.clufiles[groupnum])
    
    # Duplicate res
    res = kkpandas.kkio.read_resfile(kfs1.resfiles[groupnum])
    newres = pandas.concat([res, res.ix[idxs]], ignore_index=True)
    kkpandas.kkio.write_resfile(newres, kfs2.resfiles[groupnum])
    
    # Duplicate fet
    fet = kkpandas.kkio.read_fetfile(kfs1.fetfiles[groupnum])
    newfet = pandas.concat([fet, fet.ix[idxs]], ignore_index=True)
    kkpandas.kkio.write_fetfile(newfet, kfs2.fetfiles[groupnum])
    
    # Duplicate spk
    spk = kkpandas.kkio.read_spkfile(kfs1.spkfiles[groupnum], n_samples=24,
        n_spikes=fet.shape[0])
    newspk = np.concatenate([spk, spk[idxs, :]], axis=0)
    kkpandas.kkio.write_spkfile(newspk, kfs2.spkfiles[groupnum])

Пример #3

0

Показать файл

Файл: cluster_metrics.py Проект: mcleonard/spikesort

def load_spikes(data_dir, group, samp_rate, n_samp, n_chan):
    ''' This function takes the feature, cluster, and spike files in KlustaKwik format 
    and pulls out the features, spike times, spike waveforms for each cluster.
   
    Arguments
    ---------
    data_dir : path to the directory with the KlustaKwik files
    group : the group number you want to load
    samp_rate : the sampling rate of the recording in samples per second
    n_samp : number of samples for each stored spike in the spike file
    n_chan : number of channels stored in the spike file
    
    Returns
    -------
    out : dict of numpy structured arrays 
        A dictionary of the clusters.  The keys are the cluster
        numbers.  The values are numpy structured arrays with fields
        'times', 'waveforms', and 'pca' which give the timestamp,
        tetrode waveform, and pca reduced values, respectively, for each
        spike in the cluster.
        
    '''
    
    from KKFileSchema import KKFileSchema
    import os
    import kkio
    
    # Get the clustered data from Klustakwik files
    kfs = KKFileSchema.coerce(data_dir)
    
    # Get the spike features, time stamps, cluster labels, and waveforms
    feat = kkio.read_fetfile(kfs.fetfiles[group])
    features = feat.values[:,:-1]
    time_stamps = feat.time.values
    cluster_labels = kkio.read_clufile(kfs.clufiles[group])
    spikes = kkio.read_spkfile(kfs.spkfiles[group])
    
    # Reshaping the spike waveforms into a useful form
    spikes = spikes.reshape((len(spikes)/(n_chan*n_samp), (n_chan*n_samp)))
    for ii, spike in enumerate(spikes):
        spikes[ii] = spike.reshape((n_chan, n_samp), order = 'F').reshape(n_chan*n_samp)
    INT_TO_VOLT = 4096.0 / 2.0**15 # uV per bit
    spikes = spikes*INT_TO_VOLT
    
    cluster_ids = np.unique(cluster_labels.values)
    cluster_indices = { cid : np.where(cluster_labels.values == cid)[0] 
                        for cid in cluster_ids }
    
    clusters = dict.fromkeys(cluster_ids)
    dtypes = [('times','f8'), ('waveforms', 'f8', n_chan*n_samp), 
              ('pca', 'f8', len(features[0])) ]
    for cid, indices in cluster_indices.iteritems():
        clusters[cid] = np.zeros(len(indices), dtype = dtypes)
        clusters[cid]['times'] = time_stamps[indices]/np.float(samp_rate)
        clusters[cid]['waveforms'] = spikes[indices]
        clusters[cid]['pca'] = features[indices]
        clusters[cid].sort(order = ['times'])
    
    return clusters

Пример #4

0

Показать файл

def flush(kfs_or_path, verbose=False):
    """Remove any memoized file (basename.kkp) from the directory."""
    # Coerce to file schema
    kfs = KKFileSchema.coerce(kfs_or_path)

    # Find the memoized file
    to_delete = kfs.basename + '.kkp'

    # Delete it if it exists
    if os.path.exists(to_delete):
        if verbose: print "deleting", to_delete
        os.remove(to_delete)
    else:
        if verbose: print "no memoized files to delete"

Пример #5

0

Показать файл

Файл: kkio.py Проект: mcleonard/memory

def read_all_from_group(basename='.', group=1, n_samples=-1, n_spikes=-1,
    n_channels=-1):
    d = {}
    kfs = KKFileSchema.coerce(basename)
    res = read_resfile(kfs.resfiles[group])
    d['res'] = res
    clu = read_clufile(kfs.clufiles[group])
    d['clu'] = clu
    fet = read_fetfile(kfs.fetfiles[group])
    d['fet'] = fet
    
    if n_spikes == -1:
        n_spikes = len(res)
    spk = read_spkfile(kfs.spkfiles[group], n_spikes=n_spikes,
        n_channels=n_channels, n_samples=n_samples)
    d['spk'] = spk

    
    return d

Пример #6

0

Показать файл

def load_spiketimes(kfs_or_path, group, fs=None):
    """Given KKFileSchema or path to one, load spike times from group
    
    Returns Series
    """
    kfs = KKFileSchema.coerce(kfs_or_path)

    # check if res-files exist, which are faster to load
    if 'res' in kfs.available_filetypes:
        spiketimes = read_resfile(kfs.resfiles[group])
    elif 'fet' in kfs.available_filetypes:
        spiketimes = read_fetfile(kfs.fetfiles[group])[SPIKE_TIME_COLUMN_NAME]
    else:
        raise ValueError("no available method to grab spike times")

    # optionally convert to seconds
    if fs:
        spiketimes = spiketimes / float(fs)

    return spiketimes

Пример #7

0

Показать файл

Файл: kkio.py Проект: mcleonard/memory

def load_spiketimes(kfs_or_path, group, fs=None):
    """Given KKFileSchema or path to one, load spike times from group
    
    Returns Series
    """
    kfs = KKFileSchema.coerce(kfs_or_path)
    
    # check if res-files exist, which are faster to load
    if 'res' in kfs.available_filetypes:
        spiketimes = read_resfile(kfs.resfiles[group])
    elif 'fet' in kfs.available_filetypes:
        spiketimes = read_fetfile(kfs.fetfiles[group])[SPIKE_TIME_COLUMN_NAME]
    else:
        raise ValueError("no available method to grab spike times")
    
    # optionally convert to seconds
    if fs:
        spiketimes = spiketimes / float(fs)    
    
    return spiketimes

Пример #8

0

Показать файл

def read_all_from_group(basename='.',
                        group=1,
                        n_samples=-1,
                        n_spikes=-1,
                        n_channels=-1):
    d = {}
    kfs = KKFileSchema.coerce(basename)
    res = read_resfile(kfs.resfiles[group])
    d['res'] = res
    clu = read_clufile(kfs.clufiles[group])
    d['clu'] = clu
    fet = read_fetfile(kfs.fetfiles[group])
    d['fet'] = fet

    if n_spikes == -1:
        n_spikes = len(res)
    spk = read_spkfile(kfs.spkfiles[group],
                       n_spikes=n_spikes,
                       n_channels=n_channels,
                       n_samples=n_samples)
    d['spk'] = spk

    return d

Пример #9

0

Показать файл

def from_KK(basename='.',
            groups_to_get=None,
            group_multiplier=None,
            fs=None,
            verify_unique_clusters=True,
            add_group_as_column=True,
            load_memoized=False,
            save_memoized=False,
            also_get_features=False,
            also_get_waveforms=False,
            n_samples=-1,
            n_channels=-1):
    """Main function for loading KlustaKwik data.
    
    basename : path to, or basename of, files
    group : int or list of groups to get, otherwise get all groups
    group_multiplier : if None, the cluster ids are used as-is
        if int, then the group number times this multiplier is added to
        the cluster id.
        This is useful if groups contain the same cluster ids but you
        want them to have unique labels.
    fs : if None, the times are returned as integer number of samples
        otherwise, they are divided by this number
    verify_unique_clusters : if True, check that there are no overlapping
        cluster ids across groups
    add_group_as_column : if True, then the returned value has a column
        for the group from which the spike came.
    also_get_features, also_get_waveforms : if True, then the returned
        value has columns for these as well.
    n_samples, n_channels : Only necessary if also_get_waveforms. Only
        one of these two parameters is necessary in that case.
    
    Memoization
    ---
    Loading is faster if it is done using the binary pandas save and load 
    functions than it is with the ASCII KlustaKwik format. For this reason
    you can specify that the data be saved as a pandas file, or loaded from
    a pandas file.
    
    These options now default to False because of the potential for accidental
    mis-use. The reason is that no checking is done whether the current
    parameters are the same as the previous ones, when the memoization was
    done.
    
    load_memoized : If a file like basename.kkp exists, load this DataFrame
        and return. Note all other parameters (except basename) are ignored.
    save_memoized : the data will be written to a file like
        basename.kkp after loading.
    
    Returns:
        DataFrame with columns 'unit', 'time', and optionally 'group'
    """
    memoized_filename = None  # to be determined later, if necessary

    # load files like basename
    try:
        kfs = KKFileSchema.coerce(basename)
    except ValueError:
        # This occurs when no spike files are found, but there might still
        # be kkp files.
        load_memoized = True
        memoized_filename = glob.glob(os.path.join(basename, '*.kkp'))[0]

    # try to load memoized
    if load_memoized:
        if memoized_filename is None:
            memoized_filename = kfs.basename + '.kkp'
        try:
            data = pandas.load(memoized_filename)
            return_early = True
        except IOError:
            return_early = False

        if return_early:
            return data

    # which groups to get
    if groups_to_get:
        if not hasattr(groups_to_get, '__len__'):
            groups_to_get = [groups_to_get]
    else:
        groups_to_get = kfs.groups

    # get each group
    group_d = {}
    for group in groups_to_get:
        spiketimes = load_spiketimes(kfs, group, fs)

        if 'clu' in kfs.available_filetypes:
            unit_ids = read_clufile(kfs.clufiles[group])
        else:
            unit_ids = np.ones(spike_times.shape) * group

        if group_multiplier:
            unit_ids += group_multiplier * group

        # concatenate into data frame and add to dict
        if add_group_as_column:
            group_d[group] = pandas.DataFrame({
                spiketimes.name:
                spiketimes,
                unit_ids.name:
                unit_ids,
                'group':
                np.ones(len(spiketimes), dtype=np.int) * group
            })
        else:
            group_d[group] = pandas.DataFrame({
                spiketimes.name: spiketimes,
                unit_ids.name: unit_ids
            })

        # optionally get features too
        if also_get_features:
            assert 'fet' in kfs.available_filetypes
            # Read the feature file
            fetfile = kfs.fetfiles[group]
            features = read_fetfile(fetfile,
                                    guess_time_column=True,
                                    return_nfeatures=False)

            # Pop off the time column since we don't need it
            features.pop('time')

            # Concatenate to df for this group
            assert len(features) == len(group_d[group])
            group_d[group] = pandas.concat([group_d[group], features], axis=1)

        # optionally get waveforms too
        if also_get_waveforms:
            assert 'spk' in kfs.available_filetypes
            # Read the spike file
            # We know the number of spikes, but we need either the number
            # of samples or the number of channels
            spkfile = kfs.spkfiles[group]
            waveforms = read_spkfile(spkfile,
                                     n_spikes=len(group_d[group]),
                                     n_samples=n_samples,
                                     n_channels=n_channels)

            # Flatten, convert to dataframe, and concatenate to result
            nsamptot = waveforms.shape[1] * waveforms.shape[2]
            waveforms_df = pandas.DataFrame(
                waveforms.swapaxes(1, 2).reshape(waveforms.shape[0], nsamptot),
                columns=['wf%d' % n for n in range(nsamptot)])
            group_d[group] = pandas.concat([group_d[group], waveforms_df],
                                           axis=1)

    # optionally check if groups contain same cluster
    if verify_unique_clusters:
        clusters_by_group = [
            set(np.unique(np.asarray(groupdata.unit)))
            for groupdata in group_d.values()
        ]
        if len(clusters_by_group) > 0:
            # find number of unique clusters
            # will error here if no clusters found
            n_unique_clusters = len(set.union(*clusters_by_group))
            n_total_clusters = sum([len(g) for g in clusters_by_group])
            if n_unique_clusters != n_total_clusters:
                raise ValueError("got %d overlapping clusters" %
                                 (n_total_clusters - n_unique_clusters))

    # turn list into one giant dataframe for everybody
    sorted_keys = sorted(group_d.keys())
    data = pandas.concat([group_d[key] for key in sorted_keys],
                         ignore_index=True)

    if save_memoized:
        data.save(memoized_filename)

    return data

Пример #10

0

Показать файл

Файл: cluster_metrics.py Проект: cxrodgers/Working-memory

def load_spikes(data_dir, group, samp_rate, n_samp, n_chan):
    ''' This function takes the feature, cluster, and spike files in KlustaKwik format 
    and pulls out the features, spike times, spike waveforms for each cluster.
   
    Parameters
    -----------------------------------------
    data_dir : path to the directory with the KlustaKwik files
    group : the group number you want to load
    samp_rate : the sampling rate of the recording in samples per second
    n_samp : number of samples for each stored spike in the spike file
    n_chan : number of channels stored in the spike file
    
    Returns
    -----------------------------------------
    out : dict
        out['features'] : dictionary of clustered features
        out['times'] :  dictionary of clustered spike times
        out['waveforms'] : dictionary of clustered spike waveforms
    '''
    # Get the clustered data from Klustakwik files
    kfs = KKFileSchema.coerce(data_dir)
    
    # Get the features and spike time stamps
    feat = kkio.read_fetfile(kfs.fetfiles[group])
    features = feat.values[:,:-1]
    time_stamps = feat.time.values
    
    # Get spike cluster labels
    clu = kkio.read_clufile(kfs.clufiles[group])
    
    # Get the spike waveforms
    spikes = kkio.read_spkfile(kfs.spkfiles[group])
    
    # Reshape the spike waveforms into a useful form
    spikes = spikes.reshape((len(spikes)/(n_chan*n_samp), (n_chan*n_samp)))
    for ii, spike in enumerate(spikes):
        spikes[ii] = spike.reshape((n_chan, n_samp), order = 'F').reshape(n_chan*n_samp)
    
    # Convert spike waveforms into voltage
    spikes = spikes*(8192.0/2.**16)
    
    # Cluster numbers
    cluster_nums = np.unique(clu.values)
    
    # Grouping the indices by cluster
    cluster_ind = [ np.nonzero(clu.values == n)[0] for n in cluster_nums ]
   
    # Get the spike times for each cluster
    times = [ time_stamps[ind]/np.float(samp_rate) for ind in cluster_ind ]
    
    # Get the features for each cluster
    feats = [ features[ind] for ind in cluster_ind ]
    
    # Get the spike waveforms for each cluster
    spks = [ spikes[ind] for ind in cluster_ind ]
    
    # Make a dictionary where each key is the cluster number and the value
    # is an array of the spike times in that cluster
    clustered_times = dict(zip(cluster_nums, times))
    
    # Make a dictionary where each key is the cluster number and the value
    # is an array of the features in that cluster
    clustered_features = dict(zip(cluster_nums, feats))
    
    # Make a dictionary where each key is the cluster number and the value
    # is an array of the features in that cluster
    clustered_waveforms = dict(zip(cluster_nums, spks))
    
    # Let's make sure the spike times for each cluster are sorted correctly
    for spikes in clustered_times.itervalues():
        spikes.sort()
    
    out_dict = {'features' : clustered_features, 'times' : clustered_times,
        'waveforms' : clustered_waveforms }
    
    return out_dict

Пример #11

0

Показать файл

Файл: kkio.py Проект: mcleonard/memory

def from_KK(basename='.', groups_to_get=None, group_multiplier=None, fs=None,
    verify_unique_clusters=True, add_group_as_column=True, 
    load_memoized=False, save_memoized=False):
    """Main function for loading KlustaKwik data.
    
    basename : path to, or basename of, files
    group : int or list of groups to get, otherwise get all groups
    group_multiplier : if None, the cluster ids are used as-is
        if int, then the group number times this multiplier is added to
        the cluster id.
        This is useful if groups contain the same cluster ids but you
        want them to have unique labels.
    fs : if None, the times are returned as integer number of samples
        otherwise, they are divided by this number
    verify_unique_clusters : if True, check that there are no overlapping
        cluster ids across groups
    add_group_as_column : if True, then the returned value has a column
        for the group from which the spike came.
    
    Memoization
    ---
    Loading is faster if it is done using the binary pandas save and load 
    functions than it is with the ASCII KlustaKwik format. For this reason
    you can specify that the data be saved as a pandas file, or loaded from
    a pandas file.
    
    These options now default to False because of the potential for accidental
    mis-use. The reason is that no checking is done whether the current
    parameters are the same as the previous ones, when the memoization was
    done.
    
    load_memoized : If a file like basename.kkp exists, load this DataFrame
        and return. Note all other parameters (except basename) are ignored.
    save_memoized : the data will be written to a file like
        basename.kkp after loading.
    
    Returns:
        DataFrame with columns 'unit', 'time', and optionally 'group'
    """
    # load files like basename
    kfs = KKFileSchema.coerce(basename)
    
    # try to load memoized
    memoized_filename = kfs.basename + '.kkp'
    if load_memoized:
        try:
            data = pandas.load(memoized_filename)
            return_early = True
        except IOError:
            return_early = False
        
        if return_early:
            return data
    
    # which groups to get
    if groups_to_get:
        if not hasattr(groups_to_get, '__len__'):
            groups_to_get = [groups_to_get]
    else:
        groups_to_get = kfs.groups
    
    # get each group
    group_d = {}
    for group in groups_to_get:
        spiketimes = load_spiketimes(kfs, group, fs)
        
        if 'clu' in kfs.available_filetypes:
            unit_ids = read_clufile(kfs.clufiles[group])
        else:
            unit_ids = np.ones(spike_times.shape) * group
        
        if group_multiplier:
            unit_ids += group_multiplier * group
        
        # concatenate into data frame and add to dict
        if add_group_as_column:
            group_d[group] = pandas.DataFrame(
                {spiketimes.name: spiketimes, unit_ids.name: unit_ids,
                    'group': np.ones(len(spiketimes), dtype=np.int) * group})
        else:
            group_d[group] = pandas.DataFrame(
                {spiketimes.name: spiketimes, unit_ids.name: unit_ids})
    
    # optionally check if groups contain same cluster
    if verify_unique_clusters:
        clusters_by_group = [
            set(np.unique(np.asarray(groupdata.unit)))
            for groupdata in group_d.values()]
        n_unique_clusters = len(set.union(*clusters_by_group))
        n_total_clusters = sum([len(g) for g in clusters_by_group])
        if n_unique_clusters != n_total_clusters:
            raise ValueError("got %d overlapping clusters" % 
                (n_total_clusters - n_unique_clusters))
    
    # turn list into one giant dataframe for everybody
    sorted_keys = sorted(group_d.keys())
    data = pandas.concat([group_d[key] for key in sorted_keys], 
        ignore_index=True)    

    if save_memoized:
        data.save(memoized_filename)

    return data

Пример #12

0

Показать файл

Файл: cluster_metrics.py Проект: mcleonard/spikesort

def load_spikes(data_dir, group, samp_rate, n_samp, n_chan):
    ''' This function takes the feature, cluster, and spike files in KlustaKwik format 
    and pulls out the features, spike times, spike waveforms for each cluster.
   
    Arguments
    ---------
    data_dir : path to the directory with the KlustaKwik files
    group : the group number you want to load
    samp_rate : the sampling rate of the recording in samples per second
    n_samp : number of samples for each stored spike in the spike file
    n_chan : number of channels stored in the spike file
    
    Returns
    -------
    out : dict of numpy structured arrays 
        A dictionary of the clusters.  The keys are the cluster
        numbers.  The values are numpy structured arrays with fields
        'times', 'waveforms', and 'pca' which give the timestamp,
        tetrode waveform, and pca reduced values, respectively, for each
        spike in the cluster.
        
    '''

    from KKFileSchema import KKFileSchema
    import os
    import kkio

    # Get the clustered data from Klustakwik files
    kfs = KKFileSchema.coerce(data_dir)

    # Get the spike features, time stamps, cluster labels, and waveforms
    feat = kkio.read_fetfile(kfs.fetfiles[group])
    features = feat.values[:, :-1]
    time_stamps = feat.time.values
    cluster_labels = kkio.read_clufile(kfs.clufiles[group])
    spikes = kkio.read_spkfile(kfs.spkfiles[group])

    # Reshaping the spike waveforms into a useful form
    spikes = spikes.reshape(
        (len(spikes) / (n_chan * n_samp), (n_chan * n_samp)))
    for ii, spike in enumerate(spikes):
        spikes[ii] = spike.reshape((n_chan, n_samp),
                                   order='F').reshape(n_chan * n_samp)
    INT_TO_VOLT = 4096.0 / 2.0**15  # uV per bit
    spikes = spikes * INT_TO_VOLT

    cluster_ids = np.unique(cluster_labels.values)
    cluster_indices = {
        cid: np.where(cluster_labels.values == cid)[0]
        for cid in cluster_ids
    }

    clusters = dict.fromkeys(cluster_ids)
    dtypes = [('times', 'f8'), ('waveforms', 'f8', n_chan * n_samp),
              ('pca', 'f8', len(features[0]))]
    for cid, indices in cluster_indices.iteritems():
        clusters[cid] = np.zeros(len(indices), dtype=dtypes)
        clusters[cid]['times'] = time_stamps[indices] / np.float(samp_rate)
        clusters[cid]['waveforms'] = spikes[indices]
        clusters[cid]['pca'] = features[indices]
        clusters[cid].sort(order=['times'])

    return clusters

Python KKFileSchema.coerce примеры использования