Exemplo n.º 1
0
def bad_trials(label, threshold=20, only_indexes=True):
    data = io.load(label, 'no_smoothing')
    if 'quality' in data.columns:
        bad = select(data, _min_quality=0).full.apply(
            lambda x: np.max(x) > threshold).unstack().any(axis=1)
    else:
        bad = select(data, is_selected=True).full.apply(
            lambda x: np.max(x) > threshold).unstack().any(axis=1)

    if only_indexes:
        return bad[bad.values].index.values
    else:
        return bad
Exemplo n.º 2
0
def select_ramping_neurons(label,
                           tmin=1.5,
                           min_quality=MIN_QUALITY,
                           P_VALUE_RAMP=.05,
                           return_ramping=True):
    """
    Returns the indices of ramping neurons.

    If not return ramping, instead returns non-ramping neurons
    """
    data = io.load(label, 'no_smoothing')
    data = select(data, _min_duration=tmin, _mineq_quality=min_quality)
    fr = to_feature_array(data, subset='cropped')

    rp = lambda df: ramping_p(df.value, df.time)
    p_ramp = fr.reset_index().drop(
        'trial', axis=1).melt(id_vars=['time']).groupby('unit').apply(rp)
    is_ramp = p_ramp < P_VALUE_RAMP
    return is_ramp[is_ramp == return_ramping].index.values
Exemplo n.º 3
0
    data = loadmat(filename)

    spikes = data['dados'][0,0][1]
    behavior = data['dados'][0,0][0]

    spikes = pd.DataFrame([[ spikes[0,i][0][:,0], spikes[0,i][0][:,1]] for i in range(spikes.shape[1]) if spikes[0,i][0].shape[1]==2], columns=['times','trial'])

    behavior = pd.DataFrame(np.transpose(behavior[0,0][0]),   columns=['one','onset','offset','zero', 'duration', 'sortIdx', 'sortLabel']).drop(['one', 'zero', 'sortIdx', 'sortLabel'], axis=1)
    behavior['trial'] = np.arange(1, behavior.shape[0]+1)
    behavior=behavior.set_index('trial')

    # Calculate relative spike time
    spikes['trial_time'] = pd.DataFrame(np.transpose([spikes.times[i] - behavior.iloc[spikes.trial[i]-1].onset.as_matrix() for i in range(spikes.shape[0])]))

    return spikes, behavior

# Load into DataFrames each data
for rat in SHORTCUTS['groups']['ALL']:
    filepath = io.load(rat, 'spikesorted', getpath=True)
    if rat in SHORTCUTS['groups']['GB']:
        spikes, behavior = spikes_behavior_from_mat(filepath)
    elif rat in SHORTCUTS['groups']['EZ']:
        print(filepath)
        spikes, behavior = spikes_behavior_from_ez(filepath)
    else:
        raise NotImplementedError('This dataset is not included as a special case')

    identifiers = dict(session=rat.split()[0], rat_number=rat.split()[1] )
    io.save(spikes, rat, 'spikes', 'interim', **identifiers)
    io.save(behavior, rat, 'behavior', 'interim', **identifiers)
Exemplo n.º 4
0

conditions = product(DSETS, CLFs, BLINE, SUBSETS, LABELS)
for dset, (clf, clfname), bline, subset, label in conditions:
    savedir = '{}/{}/{}/{}'.format(basedir, clfname, dset, subset)
    print(savedir, '\n', label)
    if not os.path.exists(savedir):
        os.makedirs(savedir)

    alldata = {}
    for ramping in [True, False]:

        if label == 'all':
            print([lab for lab in SHORTCUTS['groups']['EZ']])
            dsets = [
                select(io.load(lab, dset).reset_index(),
                       _mineq_quality=MIN_QUALITY,
                       _in_unit=select_ramping_neurons(lab,
                                                       return_ramping=ramping),
                       _min_duration=tmin,
                       _max_duration=tmax).set_index(['trial', 'unit'])
                for lab in SHORTCUTS['groups']['EZ']
            ]
            print([ds.reset_index().trial.nunique() for ds in dsets])
            dsets = [
                ds for ds in dsets if ds.reset_index().trial.nunique() > 0
            ]
            n_trials = np.min(
                [ds.reset_index().trial.nunique() for ds in dsets])
            n_bins = dsets[0][subset].apply(len).min()
            alldata[ramping] = []
Exemplo n.º 5
0
# Identifier variables
id_vars = ['logC', 'penalty']
id_combs = lambda df: product(weights.unique(df[id_]) for id_ in id_vars)

# Create visualizations
for label, dset in product(SHORTCUTS['groups']['DRRD'], DSETS):
    print(label, dset)
    savedir = fsavedir(label, dset)
    if not os.path.exists(savedir):
        os.makedirs(savedir)

    # Loading data
    similarities = pd.read_csv(sim_filename(label,
                                            dset)).set_index(['unit', 'trial'])
    units = similarities.reset_index().unit.unique()
    behav = io.load(label, 'behav_stats').reset_index()
    behav = select(behav, trial_in_=similarities.reset_index()['trial'])

    # One image for each neuron
    for unit in units:
        fig = plt.figure()
        sns.heatmap(similarities.loc[unit])
        plt.title('Unit {}, {}'.format(unit, label))
        plt.savefig('{}/sim_evo_unit_{}.png'.format(savedir, unit), dpi=500)
        plt.close(fig)

    # Plus one image for the mean
    fig = plt.figure()
    sns.heatmap(similarities.reset_index().groupby('trial').mean().drop(
        'unit', axis=1))
    plt.title('Similarity with mean across units')
Exemplo n.º 6
0
    cross_validate(clf,
                   df.values,
                   df.reset_index().time,
                   df.reset_index().trial,
                   cv=GroupShuffleSplit(10),
                   scoring=scoring,
                   return_train_score=False))

# dag = DAG_analysis()
# dag.add_step(io.load, dset='wide_smoothed')
# dag.add_step(frankenstein, _min_duration=1.5, is_selected=True, is_tired=False, subset='full')
# dag.add_step_branching_by_parameter(analysis, param_name='clf', branch_names = ['LDA', 'Bayesian Ridge']
#                                     param_values=[LinearDiscriminantAnalysis(), BayesianRidge()])

# Merging rats
DR = [io.load(label, 'wide_smoothed') for label in SHORTCUTS['groups']['DRRD']]
EZ = [io.load(label, 'wide_smoothed') for label in SHORTCUTS['groups']['EZ']]

sp_pfc = frankenstein(DR,
                      _min_duration=1.5,
                      is_selected=True,
                      is_tired=False,
                      subset='full')
sp_pfc = sp_pfc[(sp_pfc.reset_index('time').time >= 200).values
                & (sp_pfc.reset_index('time').time < 1300).values]

ez_pfc = frankenstein(EZ,
                      _min_duration=1.5,
                      _min_quality=0,
                      area='PFC',
                      subset='full')
Exemplo n.º 7
0
ntrials_init_vs_after = 70
n_splits = 50
ntrials_total = 400
SUBSETS = ['cropped', 'full']

conditions = product(DSETS, CLFs, BLINE, SHORTCUTS['groups']['EZ'], SUBSETS)

for dset, (clf, clfname), bline, label, subset in conditions:
    savedir = '{}/{}/{}/{}/{}/{}'.format(basedir, clfname, dset, subset, label,
                                         bline)
    print(savedir)
    if not os.path.exists(savedir):
        os.makedirs(savedir)

    data = io.load(label, dset)
    data = select(data,
                  _mineq_quality=MINQUALITY,
                  _min_duration=tmin,
                  _max_duration=tmax)
    dataPFC = select(data, area='PFC')
    dataSTR = select(data, area='STR')
    print(data.shape, dataPFC.shape, dataSTR.shape)
    baseline = io.load(label, 'baseline')
    if bline:
        data = remove_baseline(to_feature_array(data, subset=subset), baseline,
                               .5)
        dataPFC = remove_baseline(to_feature_array(dataPFC, subset=subset),
                                  baseline, .5)
        dataSTR = remove_baseline(to_feature_array(dataSTR, subset=subset),
                                  baseline, .5)
Exemplo n.º 8
0
"""
"""
import numpy as np
import pandas as pd

import sys
sys.path.append('.')
from spikelearn.data import io, SHORTCUTS

# Load into DataFrames each data
for rat in SHORTCUTS['groups']['ALL']:
    behav = io.load(rat, 'behavior')
    behav['intertrial_interval'] = np.hstack(
        (0, behav.onset.values[1:] - behav.offset.values[:-1]))

    tiredness = io.load(rat, 'tiredness').values[0, 0] if io.dataset_exist(
        rat, 'tiredness') else float('inf')
    behav['is_tired'] = behav.index > tiredness
    io.save(behav, rat, 'behav_stats', 'interim')
Exemplo n.º 9
0
from spikelearn.data import io, select, to_feature_array, SHORTCUTS
from spikelearn.models.shuffle_decoding import  shuffle_cross_predict
from catboost import CatBoostClassifier
from sklearn.linear_model import BayesianRidgeRegression
import pickle

allres = {}
for rat, dset in product(SHORTCUTS['group']['eletro'], DSETS):
    data = select(io.load(rat, dset), _min_duration=.5, is_tired=False)
    tercils = [data.duration.quantile(q) for q in [1/3, 2/3]]

    t1 = to_feature_array(select(data, _max_duration=tercils[0]), subset='full')
    t3 = to_feature_array(select(data, _min_duration=tercils[1]), subset='full')
    res = shuffle_cross_predict(reg, [t1,t3], ['short', 'long'], n_splits=5,
                                problem='regression', feature_scaling='robust')

    allres[(rat, dset)] = res
    
pickle.dump(open('data/results/warping.pickle', 'wb'))
    # TODO calculate bias and mean bias direction
Exemplo n.º 10
0
DATASETS = ['medium_smoothed'
            ]  #['wide_smoothed', 'medium_smoothed', 'narrow_smoothed']
WHENS = ['init', 'end']
NUM_TRIALS = np.arange(10, 100, 5)
LOGCS = np.linspace(-1.5, 4, 20)

ANALYSIS_NTRIALS = product(DRRD_RATS, DATASETS, WHENS, NUM_TRIALS, [0])
ANALYSIS_REGUL = product(DRRD_RATS, DATASETS, WHENS, [50], LOGCS)
ANALYSIS = chain(ANALYSIS_NTRIALS, ANALYSIS_REGUL)

results = pd.DataFrame()
preds = pd.DataFrame()
acts = pd.DataFrame()
for rat, dataset, when, num_trials, logC in ANALYSIS:
    clf = LogisticRegression(C=10**logC, penalty='l1')
    data = io.load(rat, dataset)
    units = data.groupby('is_selected').get_group(
        True).reset_index().unit.unique()
    data = select(data.reset_index(),
                  maxlen=num_trials * units.shape[0],
                  takefrom=when,
                  is_selected=True,
                  _min_duration=1.5,
                  is_tired=False).set_index(['trial', 'unit'])

    X, y, trial = to_feature_array(data)
    local_preds, local_results = shuffle_val_predict(clf,
                                                     X,
                                                     y,
                                                     trial,
                                                     cv='sh',
Exemplo n.º 11
0
# Prepare output folders
[os.makedirs(folder + dset) for dset in DSETS]


# Code cleaners
def clean(df):
    return df.drop(['trial', 'init'], axis=1, level=0)


# Run
for label, dset in product(SHORTCUTS['groups']['DRRD'], DSETS):
    subset = 'full' if 'norm' in dset else 'cropped'
    #viz = lambda dset: dset+'_viz' if 'norm' not in dset or 'narrow' in dset else dset
    viz = lambda dset: dset
    data_ = select(io.load(label, viz(dset)),
                   _min_duration=TMIN,
                   is_selected=True)
    data = to_feature_array(data_, False, subset)

    print(label, dset)
    # Pearson similarity
    res_sim = pd.DataFrame()
    for unit in data.columns:
        sim_mat = unit_similarity_evolution(data[unit], WSIZE)
        sim_mat['unit'] = unit
        res_sim = res_sim.append(sim_mat)

    # ML prediction comparison
    res_pred = pd.DataFrame()
    res_weights = pd.DataFrame()
Exemplo n.º 12
0
import sys
sys.path.append('.')
from spikelearn.data import io, SHORTCUTS

for label in SHORTCUTS['groups']['eletro']:
    baseline = io.load(label, 'epoched_spikes').baseline.unstack('unit')
    io.save(baseline, label, 'baseline')
Exemplo n.º 13
0
            lambda x: np.max(x) > threshold).unstack().any(axis=1)
    else:
        bad = select(data, is_selected=True).full.apply(
            lambda x: np.max(x) > threshold).unstack().any(axis=1)

    if only_indexes:
        return bad[bad.values].index.values
    else:
        return bad


remove_baseline_flag = True

for rat, dset in product(SHORTCUTS['groups']['eletro'], DSETS):
    print(rat, dset)
    data = select(io.load(rat, dset),
                  _min_duration=1.5,
                  _max_duration=4.5,
                  is_tired=False)
    data = data[~data.reset_index().trial.isin(bad_trials(rat)).values]
    if rat in SHORTCUTS['groups']['DRRD']:
        data = select(data, is_selected=True)
    else:
        data = select(data, _min_quality=0)
    data = to_feature_array(data, Xyt=False, subset='full')
    if remove_baseline_flag:
        data = remove_baseline(data, io.load(rat, 'baseline'), .5)
    X, y, trial = data.values, data.reset_index().time, data.reset_index(
    ).trial
    uniquetrials = np.unique(trial)
    for i, tr_ in enumerate(uniquetrials[30:]):
Exemplo n.º 14
0
TMIN = 1.5
folder = 'data/results/across_trials/edge_crop_decoding/'
if not os.path.exists(folder):
    os.makedirs(folder)

DSETS = ['narrow_smoothed',
         'narrow_smoothed_norm']  #['medium_smoothed', 'medium_smoothed_norm',
# 'narrow_smoothed', 'narrow_smoothed_norm',
# 'wide_smoothed']
NSPLITS = 30
subset = 'cropped'

clf = LogisticRegression()
for label, dset in product(SHORTCUTS['groups']['DRRD'], DSETS):

    data = select(io.load(label, dset),
                  _min_duration=1.5,
                  is_selected=True,
                  is_tired=False)
    data = to_feature_array(data)

    times = data.reset_index().time.unique()

    res = []
    for crop in range(len(times - 1) // 2):
        if crop > 0:
            to_use_times = times[crop:-crop]
        else:
            to_use_times = times
        df = select(data.reset_index(),
                    time_in_=to_use_times).set_index(['trial', 'time'])
Exemplo n.º 15
0
import os

from spikelearn.data import io, to_feature_array, select, SHORTCUTS

# Directory
savedir = 'data/results/duration/d_prime2'
if not os.path.exists(savedir):
    os.makedirs(savedir)

# Parameters
T_short_MAX, T_long_MIN = 1.5, 1.5

for label in SHORTCUTS['groups']['DRRD']:

    # Load necessary data
    data = io.load(label, 'epoched_spikes')
    data = select(data, is_tired=False)  # Remove unwanted trials
    data = select(data, is_selected=True)  # Remove unwanted unit

    # Separate trials long and short
    data['is_short'] = (data.duration < T_short_MAX)
    data['is_long'] = (data.duration > T_long_MIN)
    data = data[data.is_short | data.is_long].reset_index()

    # Separate along session moment
    data['is_init'] = data.trial < data.trial.quantile(.5)

    # Number of spikes during baseline
    data['baseline'] = data.baseline.apply(len)

    # Calculate D'
Exemplo n.º 16
0
    'narrow_smoothed_norm': {
        'sigma': 20,
        'bin_size': 50
    },
    'narrow_smoothed_norm_viz': {
        'sigma': 20,
        'bin_size': 10
    },
    'no_smoothing_norm': {
        'sigma': None,
        'bin_size': 100
    }
}

for rat_label in SHORTCUTS['groups']['eletro']:  #SHORTCUTS:
    epoched = io.load(rat_label, 'epoched_spikes')

    for dset_name, params in DSET_PARAMS.items():
        # Create dataset and add identifiers
        smoothed_dataset = pd.DataFrame(index=epoched.index)

        if 'norm' in dset_name:
            cnames = ['normalized_time', 'normalized_without_edges']
            edges_for_each = [lambda x: (0, 1000), lambda x: (0, 1000)]
        else:
            cnames = ['with_baseline', 'time']
            edges_for_each = [
                lambda x: (BASELINE, 1000 * x.duration), lambda x:
                (MA_CUT[0], 1000 * x.duration - MA_CUT[1])
            ]