""" import pandas as pd import numpy as np import seaborn as sns from os.path import join import matplotlib.pyplot as plt from paper_behavior_functions import seaborn_style, figpath, load_csv, FIGURE_WIDTH, FIGURE_HEIGHT # Settings FIG_PATH = figpath() colors = [[1, 1, 1], [1, 1, 1], [0.6, 0.6, 0.6]] seaborn_style() # Load in results from csv file decoding_result = load_csv('classification_results', 'classification_results_full_bayes.pkl') # Calculate if decoder performs above chance chance_level = decoding_result['original_shuffled'].mean() significance = np.percentile(decoding_result['original'], 2.5) sig_control = np.percentile(decoding_result['control'], 0.001) if chance_level > significance: print('Classification performance not significanlty above chance') else: print('Above chance classification performance!') # %% f, ax1 = plt.subplots(1, 1, figsize=(FIGURE_WIDTH/5, FIGURE_HEIGHT)) sns.violinplot(data=pd.concat([decoding_result['control'], decoding_result['original_shuffled'],
trials.fetch(format='frame').join( subject_info.fetch(format='frame')).sort_values(by=[ 'institution_short', 'subject_nickname', 'session_start_time', 'trial_id' ]).reset_index()) behav['institution_code'] = behav.institution_short.map(institution_map) # split the two types of task protocols (remove the pybpod version number) behav['task'] = behav['task_protocol'].str[14:20].copy() # RECODE SOME THINGS JUST FOR PATSY behav['contrast'] = np.abs(behav.signed_contrast) behav['stimulus_side'] = np.sign(behav.signed_contrast) behav['block_id'] = behav['probabilityLeft'].map({80: -1, 50: 0, 20: 1}) else: # load from disk behav = load_csv('Fig5.csv') # ========================================== # #%% 2. DEFINE THE GLM # ========================================== # # DEFINE THE MODEL def fit_glm(behav, prior_blocks=False, folds=5): # drop trials with contrast-level 50, only rarely present (should not be its own regressor) behav = behav[np.abs(behav.signed_contrast) != 50] # use patsy to easily build design matrix if not prior_blocks: endog, exog = patsy.dmatrices(
from ibl_pipeline import reference, subject, behavior use_sessions, _ = query_sessions_around_criterion(criterion='biased', days_from_criterion=[1, 3]) use_sessions = use_sessions & 'task_protocol LIKE "%biased%"' # only get biased sessions b = (use_sessions * subject.Subject * subject.SubjectLab * reference.Lab * behavior.TrialSet.Trial) b2 = b.proj('institution_short', 'subject_nickname', 'task_protocol', 'trial_stim_contrast_left', 'trial_stim_contrast_right', 'trial_response_choice', 'task_protocol', 'trial_stim_prob_left', 'trial_feedback_type', 'trial_response_time', 'trial_stim_on_time', 'time_zone') bdat = b2.fetch(order_by='institution_short, subject_nickname, session_start_time, trial_id', format='frame').reset_index() behav = dj2pandas(bdat) behav['institution_code'] = behav.institution_short.map(institution_map()[0]) else: behav = load_csv('Fig4.csv') biased_fits = pd.DataFrame() for i, nickname in enumerate(behav['subject_nickname'].unique()): if np.mod(i+1, 10) == 0: print('Processing data of subject %d of %d' % (i+1, len(behav['subject_nickname'].unique()))) # Get lab and timezone lab = behav.loc[behav['subject_nickname'] == nickname, 'institution_code'].unique()[0] time_zone = behav.loc[behav['subject_nickname'] == nickname, 'time_zone'].unique()[0] if (time_zone == 'Europe/Lisbon') or (time_zone == 'Europe/London'): time_zone_number = 0 elif time_zone == 'America/New_York': time_zone_number = -5 elif time_zone == 'America/Los_Angeles':
behavior_analysis.SessionTrainingStatus - subject.Death & 'training_status = "in_training"' & 'session_start_time > "%s"' % CUTOFF_DATE) use_subjects = mice_started_training - still_training # Get training status and training time in number of sessions and trials ses = ((use_subjects * behavior_analysis.SessionTrainingStatus * behavior_analysis.PsychResults).proj( 'subject_nickname', 'training_status', 'n_trials_stim', 'institution_short').fetch(format='frame').reset_index()) ses['n_trials'] = [sum(i) for i in ses['n_trials_stim']] ses = ses.drop('n_trials_stim', axis=1).dropna() ses = ses.sort_values(['subject_nickname', 'session_start_time']) else: # Load in sessions from csv file ses = load_csv('Fig2d.csv').dropna() # Select mice that started training before cut off date ses = ses.groupby('subject_uuid').filter( lambda s: s['session_start_time'].min() < CUTOFF_DATE) # Construct dataframe from query training_time = pd.DataFrame() for i, nickname in enumerate(ses['subject_nickname'].unique()): training_time.loc[i, 'nickname'] = nickname training_time.loc[i, 'lab'] = ses.loc[ses['subject_nickname'] == nickname, 'institution_short'].values[0] training_time.loc[i, 'sessions'] = sum( (ses['subject_nickname'] == nickname) & ((ses['training_status'] == 'in_training') | (ses['training_status'] == 'untrainable')))
import numpy as np import seaborn as sns from os.path import join import matplotlib.pyplot as plt from paper_behavior_functions import seaborn_style, figpath, load_csv, FIGURE_WIDTH, FIGURE_HEIGHT # Settings FIG_PATH = figpath() colors = [[1, 1, 1], [1, 1, 1], [0.6, 0.6, 0.6]] seaborn_style() for DECODER in ['bayes', 'forest', 'regression']: # Load in results from csv file filename = f'classification_results_basic_{DECODER}.pkl' decoding_result = load_csv('classification_results', filename) # Calculate if decoder performs above chance chance_level = decoding_result['original_shuffled'].mean() significance = np.percentile(decoding_result['original'], 2.5) sig_control = np.percentile(decoding_result['control'], 0.001) if chance_level > significance: print('\n%s classifier did not perform above chance' % DECODER) print('Chance level: %.2f (F1 score)' % chance_level) else: print('\n%s classifier did not perform above chance' % DECODER) print('Chance level: %.2f (F1 score)' % chance_level) print('F1 score: %.2f ± %.3f' % (decoding_result['original'].mean(), decoding_result['original'].std())) # %%
subject.Subject * subject.SubjectLab * reference.Lab * (behavior.TrialSet.Trial & session_keys)) ses = ses.proj( 'institution_short', 'subject_nickname', 'task_protocol', 'trial_stim_contrast_left', 'trial_stim_contrast_right', 'trial_response_choice', 'task_protocol', 'trial_stim_prob_left', 'trial_feedback_type', 'trial_response_time', 'trial_stim_on_time', 'time_zone').fetch( order_by= 'institution_short, subject_nickname,session_start_time, trial_id', format='frame').reset_index() behav = dj2pandas(ses) behav['institution_code'] = behav.institution_short.map( institution_map()[0]) else: behav = load_csv('Fig3.csv') # Create dataframe with behavioral metrics of all mice learned = pd.DataFrame(columns=[ 'mouse', 'lab', 'perf_easy', 'n_trials', 'threshold', 'bias', 'reaction_time', 'lapse_low', 'lapse_high', 'time_zone', 'UTC' ]) for i, nickname in enumerate(behav['subject_nickname'].unique()): if np.mod(i + 1, 10) == 0: print('Processing data of subject %d of %d' % (i + 1, len(behav['subject_nickname'].unique()))) # Get the trials of the sessions around criterion for this subject trials = behav[behav['subject_nickname'] == nickname] trials = trials.reset_index()
FIGURE_WIDTH, FIGURE_HEIGHT, fit_psychfunc, num_star, query_session_around_performance) import scikit_posthocs as sp from statsmodels.stats.multitest import multipletests seaborn_style() figpath = figpath() pal = group_colors() institution_map, col_names = institution_map() col_names = col_names[:-1] if QUERY == True: behav = query_session_around_performance(perform_thres=0.8) else: behav = load_csv('suppfig_3-4af.pkl') behav['institution_code'] = behav.lab_name.map(institution_map) # Create dataframe with behavioral metrics of all mice learned = pd.DataFrame(columns=[ 'mouse', 'institution_short', 'perf_easy', 'n_trials', 'threshold', 'bias', 'reaction_time', 'lapse_low', 'lapse_high', 'trials_per_minute' ]) for i, nickname in enumerate(behav['subject_nickname'].unique()): if np.mod(i + 1, 10) == 0: print('Processing data of subject %d of %d' % (i + 1, len(behav['subject_nickname'].unique()))) # Get the trials of the sessions around criterion for this subject (first # 90% + next session)
'lapse_high') query = (not_trained.aggr(status, session_start_time='max(session_start_time)', session_n='COUNT(session_start_time)') * status * subject.Death.proj('death_ts') * behavior_analysis.PsychResults.proj(*fields) * behavior.TrialSet.proj('n_trials')) df = ((query.fetch(format='frame').reset_index().drop('subject_project', axis=1))) # Print a breakdown of final training statuses print(df.training_status.value_counts(), '\n') # Load the cull reasons from file. These were not available through DJ. df.subject_uuid = df.subject_uuid.astype(str) cull_reasons = load_csv('cull_reasons.csv') df = pd.merge(df, cull_reasons, on='subject_uuid') # NB: Untrainable training status takes precedence over cull reason not_trained = len(mice_started_training) - len(trained) untrainable = df['training_status'] == 'untrainable' time_limit = (df.cull_reason == 'time limit reached') & ~untrainable low_trial_n = df['n_trials'] < 400 biased = df['bias'].abs() > 15 low_perf = df['performance_easy'] < 65 # Inspecting deaths injury = ('acute injury', 'infection or illness', 'issue during surgery') premature_death = ~untrainable & (df.cull_reason != 'time limit reached') sick = df.training_status[df.cull_reason.isin(injury)][premature_death] benign = premature_death & (df.cull_reason == 'benign experimental impediments')
pal = group_colors() institution_map, col_names = institution_map() col_names = col_names[:-1] # %% ============================== # # GET DATA FROM TRAINED ANIMALS # ================================= # if QUERY is True: use_subjects = query_subjects() b = (behavioral_analyses.BehavioralSummaryByDate * use_subjects * behavioral_analyses.BehavioralSummaryByDate.PsychResults) behav = b.fetch( order_by='institution_short, subject_nickname, training_day', format='frame').reset_index() behav['institution_code'] = behav.institution_short.map(institution_map) else: behav = load_csv('Fig2af.pkl') # exclude sessions with fewer than 100 trials behav = behav[behav['n_trials_date'] > 100] # exclude sessions with less than 3 types of contrast behav.loc[behav['signed_contrasts'].str.len() < 6, 'threshold'] = np.nan behav.loc[behav['signed_contrasts'].str.len() < 6, 'bias'] = np.nan # convolve performance over 3 days for i, nickname in enumerate(behav['subject_nickname'].unique()): # 1.Performance perf = behav.loc[behav['subject_nickname'] == nickname, 'performance_easy'].values perf_conv = np.convolve(perf, np.ones((3, )) / 3, mode='valid') # perf_conv = np.append(perf_conv, [np.nan, np.nan]) perf_conv = medfilt(perf, kernel_size=3) behav.loc[behav['subject_nickname'] == nickname, 'performance_easy'] = perf_conv
group_colors, figpath, load_csv, FIGURE_WIDTH, FIGURE_HEIGHT, num_star) # Load some things from paper_behavior_functions figpath = Path(figpath()) seaborn_style() institution_map, col_names = institution_map() pal = group_colors() cmap = sns.diverging_palette(20, 220, n=3, center="dark") # ========================================== # #%% 1. GET GLM FITS FOR ALL MICE # ========================================== # print('loading model from disk...') params_basic = load_csv('model_results', 'params_basic.csv') params_full = load_csv('model_results', 'params_full.csv') combined = params_basic.merge(params_full, on=['institution_code', 'subject_nickname']) # ========================================== # # PRINT SUMMARY AND STATS # ========================================== # vars = ['6.25', '12.5', '25', '100', 'rewarded', 'unrewarded', 'bias'] for v in vars: print('basic task, %s: mean %.2f, %f : %f' % (v, params_basic[v].mean(), params_basic[v].min(), params_basic[v].max())) print(
# Construct dataframe training_time = pd.DataFrame(columns=['sessions'], data=ses.groupby('subject_nickname').size()) ses['n_trials_date'] = ses['n_trials_date'].astype(int) training_time['trials'] = ses.groupby( 'subject_nickname').sum()['n_trials_date'] training_time['lab'] = ses.groupby( 'subject_nickname')['institution_short'].apply(list).str[0] # Change lab name into lab number training_time['lab_number'] = training_time.lab.map(institution_map) training_time = training_time.sort_values('lab_number') training_time = training_time.reset_index() else: data = load_csv('Fig2af.pkl').dropna() use_subjects = data['subject_nickname'].unique( ) # For counting the number of subjects training_time = pd.DataFrame() for i, subject in enumerate(use_subjects): training_time = training_time.append( pd.DataFrame( index=[training_time.shape[0] + 1], data={ 'subject_nickname': subject, 'lab': data.loc[data['subject_nickname'] == subject, 'institution_short'].unique(), 'sessions': data.loc[((data['subject_nickname'] == subject)
ses = ((use_sessions & 'task_protocol LIKE "%training%"') * subject.Subject * subject.SubjectLab * reference.Lab * (behavior.TrialSet.Trial & session_keys)) ses = ses.proj( 'institution_short', 'subject_nickname', 'task_protocol', 'session_uuid', 'trial_stim_contrast_left', 'trial_stim_contrast_right', 'trial_response_choice', 'task_protocol', 'trial_stim_prob_left', 'trial_feedback_type', 'trial_response_time', 'trial_stim_on_time', 'session_end_time').fetch( order_by= 'institution_short, subject_nickname,session_start_time, trial_id', format='frame').reset_index() behav = dj2pandas(ses) behav['institution_code'] = behav.institution_short.map(institution_map) else: behav = load_csv('Fig3.csv', parse_dates=['session_start_time', 'session_end_time']) # Create dataframe with behavioral metrics of all mice learned = pd.DataFrame(columns=[ 'mouse', 'lab', 'perf_easy', 'n_trials', 'threshold', 'bias', 'reaction_time', 'lapse_low', 'lapse_high', 'trials_per_minute' ]) for i, nickname in enumerate(behav['subject_nickname'].unique()): if np.mod(i + 1, 10) == 0: print('Processing data of subject %d of %d' % (i + 1, len(behav['subject_nickname'].unique()))) # Get the trials of the sessions around criterion for this subject trials = behav[behav['subject_nickname'] == nickname] trials = trials.reset_index()