def main(): train_prediction_accuracy = {} test_prediction_accuracy = {} for condition in ['experimental', 'control']: print('Condition: {}'.format(condition)) train_prediction_accuracy[condition] = { method: [] for method in METHODS } test_prediction_accuracy[condition] = { method: [] for method in METHODS } trajs = {} if condition == 'control': trajs['on'] = igfh.get_trajs_with_integrated_odor_above_threshold( EXPERIMENT_ID, 'on', integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLDS['on'], max_trajs=N_TRAIN + N_TEST, ) trajs['none'] = igfh.get_trajs_with_integrated_odor_above_threshold( EXPERIMENT_ID, 'on', integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLDS['none'], max_trajs=N_TRAIN + N_TEST, ) elif condition == 'experimental': trajs['on'] = igfh.get_trajs_with_integrated_odor_above_threshold( EXPERIMENT_ID, 'on', integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLDS['on'], max_trajs=N_TRAIN + N_TEST, ) trajs['none'] = igfh.get_trajs_with_integrated_odor_above_threshold( EXPERIMENT_ID, 'none', integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLDS['none'], max_trajs=N_TRAIN + N_TEST, ) print('{} trajectories with odor on'.format(len(trajs['on']))) print('{} trajectories with odor off'.format(len(trajs['none']))) assert len(trajs['on']) >= N_TRAIN + N_TEST assert len(trajs['none']) >= N_TRAIN + N_TEST print('Sufficient trajectories for classification analysis') for tr_ctr in range(N_TRIALS): if tr_ctr % 20 == 19: print('Trial # {}'.format(tr_ctr + 1)) vels = {} # get all data for odor_state in ODOR_STATES: shuffle(trajs[odor_state]) vels[odor_state] = { 'train': [ traj.velocities(session) for traj in trajs[odor_state][:N_TRAIN] ], 'test': [ traj.velocities(session) for traj in trajs[odor_state][N_TRAIN:N_TRAIN + N_TEST] ], } # loop through all classifiers for method in METHODS: # train classifer if method == 'var': clf = tsc.VarClassifierBinary(dim=3, order=2) elif method == 'mean_speed': clf = tsc.MeanSpeedClassifierBinary() elif method == 'mean_heading': clf = tsc.MeanHeadingClassifierBinary() elif method == 'std_heading': clf = tsc.StdHeadingClassifierBinary() clf.train(positives=vels['on']['train'], negatives=vels['none']['train']) # make predictions on training set train_predictions = np.array( clf.predict(vels['on']['train'] + vels['none']['train'])) train_ground_truth = np.concatenate([[1] * N_TRAIN, [-1] * N_TRAIN]) train_accuracy = 100 * np.mean( train_predictions == train_ground_truth) # make predictions on test set test_trajs = np.array(vels['on']['test'] + vels['none']['test']) test_ground_truth = np.concatenate([[1] * N_TEST, [-1] * N_TEST]) # shuffle trajectories and ground truths for good luck rand_idx = np.random.permutation(len(test_trajs)) test_trajs = test_trajs[rand_idx] test_ground_truth = test_ground_truth[rand_idx] # predict test_predictions = np.array(clf.predict(test_trajs)) test_accuracy = 100 * np.mean( test_predictions == test_ground_truth) # store values for later plotting train_prediction_accuracy[condition][method].append( train_accuracy) test_prediction_accuracy[condition][method].append( test_accuracy) # make plot for method in METHODS: fig, axs = plt.subplots(2, 1, facecolor=FACE_COLOR, figsize=FIG_SIZE, sharex=True, tight_layout=True) axs[0].hist(test_prediction_accuracy['control'][method], normed=True, color='b', lw=0) axs[0].hist(test_prediction_accuracy['experimental'][method], normed=True, color='g', lw=0) axs[1].hist(train_prediction_accuracy['control'][method], normed=True, color='b', lw=0) axs[1].hist(train_prediction_accuracy['experimental'][method], normed=True, color='g', lw=0) axs[0].legend( [ 'Training examples from same class', 'Training examples from different classes' ], loc='best', fontsize=FONT_SIZE, ) axs[0].set_xlabel('Test set prediction accuracy (%)') axs[0].set_ylabel('Probability') axs[1].set_xlabel('Training set prediction accuracy (%)') axs[1].set_ylabel('Probability') axs[0].set_title( 'Experiment: {}\n {} training, {} test\n{} classifier'.format( EXPERIMENT_ID, N_TRAIN, N_TEST, method)) for ax in axs: axis_tools.set_fontsize(ax, FONT_SIZE) fig.savefig('/Users/rkp/Desktop/classifier_{}_method_{}.png'.format( EXPERIMENT_ID, method))
def main(n_trials, n_train_max, n_test_max, root_dir_env_var): # make basis functions basis_ins, basis_outs, max_filter_length = igfh.make_exponential_basis_functions( INPUT_TAUS, OUTPUT_TAUS, DOMAIN_FACTOR ) for expt_id in EXPERIMENT_IDS: for odor_state in ODOR_STATES: trajs = igfh.get_trajs_with_integrated_odor_above_threshold( expt_id, odor_state, INTEGRATED_ODOR_THRESHOLD ) train_test_ratio = (n_train_max / (n_train_max + n_test_max)) test_train_ratio = (n_test_max / (n_train_max + n_test_max)) n_train = min(n_train_max, np.floor(len(trajs) * train_test_ratio)) n_test = min(n_test_max, np.floor(len(trajs) * test_train_ratio)) trajs_trains = [] trajs_tests = [] glmss = [] residualss = [] for trial_ctr in range(n_trials): print('{}: odor {} (trial number: {})'.format(expt_id, odor_state, trial_ctr)) # get random set of training and test trajectories perm = np.random.permutation(len(trajs)) train_idxs = perm[:n_train] test_idxs = perm[-n_test:] trajs_train = list(np.array(trajs)[train_idxs]) trajs_test = list(np.array(trajs)[test_idxs]) # do some more stuff glms = [] residuals = [] for input_set, output, basis_in, basis_out in zip(INPUT_SETS, OUTPUTS, basis_ins, basis_outs): # get relevant time-series data from each trajectory set data_train = igfh.time_series_from_trajs( trajs_train, inputs=input_set, output=output ) data_test = igfh.time_series_from_trajs( trajs_test, inputs=input_set, output=output ) glm = fitting.GLMFitter(link=LINK, family=FAMILY) glm.set_params(DELAY, basis_in=basis_in, basis_out=False) glm.input_set = input_set glm.output = output # fit to training data glm.fit(data=data_train, start=START_TIMEPOINT) # predict test data prediction = glm.predict(data=data_test, start=START_TIMEPOINT) _, ground_truth = glm.make_feature_matrix_and_response_vector(data_test, START_TIMEPOINT) # calculate residual residual = np.sqrt(((prediction - ground_truth)**2).mean()) # clear out feature matrix and response from glm for efficient storage glm.feature_matrix = None glm.response_vector = None glm.results.remove_data() # store things glms.append(glm) residuals.append(residual) trajs_train_ids = [traj.id for traj in trajs_train] trajs_test_ids = [traj.id for traj in trajs_test] trajs_trains.append(trajs_train_ids) trajs_tests.append(trajs_test_ids) glmss.append(glms) residualss.append(residuals) # save a glm fit set glm_fit_set = models.GlmFitSet() # add data to it glm_fit_set.root_dir_env_var = root_dir_env_var glm_fit_set.path_relative = 'glm_fit' glm_fit_set.file_name = '{}_{}_odor_{}.pickle'.format(FIT_NAME, expt_id, odor_state) glm_fit_set.experiment = session.query(models.Experiment).get(expt_id) glm_fit_set.odor_state = odor_state glm_fit_set.name = FIT_NAME glm_fit_set.link = LINK glm_fit_set.family = FAMILY glm_fit_set.integrated_odor_threshold = INTEGRATED_ODOR_THRESHOLD glm_fit_set.predicted = PREDICTED glm_fit_set.delay = DELAY glm_fit_set.start_time_point = START_TIMEPOINT glm_fit_set.n_glms = len(glms) glm_fit_set.n_train = n_train glm_fit_set.n_test = n_test glm_fit_set.n_trials = n_trials # save data file glm_fit_set.save_to_file( input_sets=INPUT_SETS, outputs=OUTPUTS, basis_in=basis_ins, basis_out=basis_outs, trajs_train=trajs_trains, trajs_test=trajs_tests, glms=glmss, residuals=residualss ) # save everything else (+ link to data file) in database session.add(glm_fit_set) commit(session)
def test_fitting_of_multiple_models_to_single_training_set_and_seeing_how_well_they_predict_test_set( self): # make basis sets for each model print('Making filter basis functions...') basis_ins, basis_outs, max_filter_length = igfh.make_exponential_basis_functions( INPUT_TAUS, OUTPUT_TAUS, DOMAIN_FACTOR) n_models = len(basis_outs) print('Getting trajectories...') trajs = igfh.get_trajs_with_integrated_odor_above_threshold( experiment_id=EXPT_ID, odor_state=ODOR_STATE, integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLD) # split these into training and test trajectories perm = np.random.permutation(N_TRAIN + N_TEST) train_idxs = perm[:N_TRAIN] test_idxs = perm[N_TRAIN:N_TRAIN + N_TEST] trajs = np.array(trajs) trajs_train = list(trajs[train_idxs]) trajs_test = list(trajs[test_idxs]) # fit each of N models to training data and predict test data print('Fitting models...') models = [] residuals = [] for input_set, output, basis_in, basis_out in zip( INPUT_SETS, OUTPUTS, basis_ins, basis_outs): # get relevant time-series data from each trajectory set data_train = igfh.time_series_from_trajs(trajs_train, inputs=input_set, output=output) data_test = igfh.time_series_from_trajs(trajs_test, inputs=input_set, output=output) model = fitting.GLMFitter(link=LINK_NAME, family=FAMILY_NAME) model.set_params(DELAY, basis_in=basis_in, basis_out=False) model.input_set = input_set model.output = output # fit to training data model.fit(data=data_train, start=START_TIMEPOINT) # predict test data prediction = model.predict(data=data_test, start=START_TIMEPOINT) _, ground_truth = model.make_feature_matrix_and_response_vector( data_test, START_TIMEPOINT) # calculate residual residual = np.sqrt(((prediction - ground_truth)**2).mean()) # store things models.append(model) residuals.append(residual) print('Generating plots...') # plot basis and filters for each model, as well as example time-series with prediction fig_filt, axs_filt = plt.subplots(n_models, 2, facecolor='white', figsize=(10, 10), tight_layout=True) fig_ts, axs_ts = plt.subplots(n_models, 1, facecolor='white', figsize=(10, 10), tight_layout=True) for model, res, ax_filt_row, ax_ts in zip(models, residuals, axs_filt, axs_ts): data_test = igfh.time_series_from_trajs(trajs_test, inputs=model.input_set, output=model.output) model.plot_filters(ax_filt_row[0], x_lim=(0, 100)) model.plot_basis(ax_filt_row[1], x_lim=(0, 100)) prediction_0 = model.predict(data=data_test[0:1], start=START_TIMEPOINT) _, ground_truth_0 = model.make_feature_matrix_and_response_vector( data_test[0:1], START_TIMEPOINT) t = np.arange(len(data_test[0][1]))[-len(prediction_0):] ax_ts.plot(t, ground_truth_0, color='k', ls='-') ax_ts.plot(t, prediction_0, color='r', ls='--', lw=2) odor = igfh.time_series_from_trajs(trajs_test, inputs=('odor', ), output=model.output)[0][0][0] ax_ts_odor = ax_ts.twinx() t_odor = np.arange(START_TIMEPOINT + DELAY, len(odor)) ax_ts_odor.plot(t_odor, odor[START_TIMEPOINT + DELAY:], color='b', ls='-') ax_filt_row[0].set_ylabel('filter\nstrength') ax_ts.set_title('Residual = {}'.format(res)) axs_filt[-1][0].set_xlabel('timestep') axs_filt[-1][1].set_xlabel('timestep') axs_ts[-1].set_xlabel('timestep') fig_filt.savefig(os.path.join(SAVE_DIR, 'filters.png')) fig_ts.savefig(os.path.join(SAVE_DIR, 'example_predictions.png')) plt.show()
def test_fitting_of_multiple_models_to_single_training_set_and_seeing_how_well_they_predict_test_set(self): # make basis sets for each model print('Making filter basis functions...') basis_ins, basis_outs, max_filter_length = igfh.make_exponential_basis_functions( INPUT_TAUS, OUTPUT_TAUS, DOMAIN_FACTOR ) n_models = len(basis_outs) print('Getting trajectories...') trajs = igfh.get_trajs_with_integrated_odor_above_threshold( experiment_id=EXPT_ID, odor_state=ODOR_STATE, integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLD ) # split these into training and test trajectories perm = np.random.permutation(N_TRAIN + N_TEST) train_idxs = perm[:N_TRAIN] test_idxs = perm[N_TRAIN:N_TRAIN + N_TEST] trajs = np.array(trajs) trajs_train = list(trajs[train_idxs]) trajs_test = list(trajs[test_idxs]) # fit each of N models to training data and predict test data print('Fitting models...') models = [] residuals = [] for input_set, output, basis_in, basis_out in zip(INPUT_SETS, OUTPUTS, basis_ins, basis_outs): # get relevant time-series data from each trajectory set data_train = igfh.time_series_from_trajs( trajs_train, inputs=input_set, output=output ) data_test = igfh.time_series_from_trajs( trajs_test, inputs=input_set, output=output ) model = fitting.GLMFitter(link=LINK_NAME, family=FAMILY_NAME) model.set_params(DELAY, basis_in=basis_in, basis_out=False) model.input_set = input_set model.output = output # fit to training data model.fit(data=data_train, start=START_TIMEPOINT) # predict test data prediction = model.predict(data=data_test, start=START_TIMEPOINT) _, ground_truth = model.make_feature_matrix_and_response_vector(data_test, START_TIMEPOINT) # calculate residual residual = np.sqrt(((prediction - ground_truth)**2).mean()) # store things models.append(model) residuals.append(residual) print('Generating plots...') # plot basis and filters for each model, as well as example time-series with prediction fig_filt, axs_filt = plt.subplots( n_models, 2, facecolor='white', figsize=(10, 10), tight_layout=True ) fig_ts, axs_ts = plt.subplots( n_models, 1, facecolor='white', figsize=(10, 10), tight_layout=True ) for model, res, ax_filt_row, ax_ts in zip(models, residuals, axs_filt, axs_ts): data_test = igfh.time_series_from_trajs( trajs_test, inputs=model.input_set, output=model.output ) model.plot_filters(ax_filt_row[0], x_lim=(0, 100)) model.plot_basis(ax_filt_row[1], x_lim=(0, 100)) prediction_0 = model.predict(data=data_test[0:1], start=START_TIMEPOINT) _, ground_truth_0 = model.make_feature_matrix_and_response_vector(data_test[0:1], START_TIMEPOINT) t = np.arange(len(data_test[0][1]))[-len(prediction_0):] ax_ts.plot(t, ground_truth_0, color='k', ls='-') ax_ts.plot(t, prediction_0, color='r', ls='--', lw=2) odor = igfh.time_series_from_trajs( trajs_test, inputs=('odor',), output=model.output )[0][0][0] ax_ts_odor = ax_ts.twinx() t_odor = np.arange(START_TIMEPOINT + DELAY, len(odor)) ax_ts_odor.plot(t_odor, odor[START_TIMEPOINT + DELAY:], color='b', ls='-') ax_filt_row[0].set_ylabel('filter\nstrength') ax_ts.set_title('Residual = {}'.format(res)) axs_filt[-1][0].set_xlabel('timestep') axs_filt[-1][1].set_xlabel('timestep') axs_ts[-1].set_xlabel('timestep') fig_filt.savefig(os.path.join(SAVE_DIR, 'filters.png')) fig_ts.savefig(os.path.join(SAVE_DIR, 'example_predictions.png')) plt.show()
def main(): train_prediction_accuracy = {} test_prediction_accuracy = {} for condition in ['experimental', 'control']: print('Condition: {}'.format(condition)) train_prediction_accuracy[condition] = {method: [] for method in METHODS} test_prediction_accuracy[condition] = {method: [] for method in METHODS} trajs = {} if condition == 'control': trajs['on'] = igfh.get_trajs_with_integrated_odor_above_threshold( EXPERIMENT_ID, 'on', integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLDS['on'], max_trajs=N_TRAIN+N_TEST, ) trajs['none'] = igfh.get_trajs_with_integrated_odor_above_threshold( EXPERIMENT_ID, 'on', integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLDS['none'], max_trajs=N_TRAIN+N_TEST, ) elif condition == 'experimental': trajs['on'] = igfh.get_trajs_with_integrated_odor_above_threshold( EXPERIMENT_ID, 'on', integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLDS['on'], max_trajs=N_TRAIN+N_TEST, ) trajs['none'] = igfh.get_trajs_with_integrated_odor_above_threshold( EXPERIMENT_ID, 'none', integrated_odor_threshold=INTEGRATED_ODOR_THRESHOLDS['none'], max_trajs=N_TRAIN+N_TEST, ) print('{} trajectories with odor on'.format(len(trajs['on']))) print('{} trajectories with odor off'.format(len(trajs['none']))) assert len(trajs['on']) >= N_TRAIN + N_TEST assert len(trajs['none']) >= N_TRAIN + N_TEST print('Sufficient trajectories for classification analysis') for tr_ctr in range(N_TRIALS): if tr_ctr % 20 == 19: print('Trial # {}'.format(tr_ctr + 1)) vels = {} # get all data for odor_state in ODOR_STATES: shuffle(trajs[odor_state]) vels[odor_state] = { 'train': [traj.velocities(session) for traj in trajs[odor_state][:N_TRAIN]], 'test': [traj.velocities(session) for traj in trajs[odor_state][N_TRAIN:N_TRAIN+N_TEST]], } # loop through all classifiers for method in METHODS: # train classifer if method == 'var': clf = tsc.VarClassifierBinary(dim=3, order=2) elif method == 'mean_speed': clf = tsc.MeanSpeedClassifierBinary() elif method == 'mean_heading': clf = tsc.MeanHeadingClassifierBinary() elif method == 'std_heading': clf = tsc.StdHeadingClassifierBinary() clf.train(positives=vels['on']['train'], negatives=vels['none']['train']) # make predictions on training set train_predictions = np.array(clf.predict(vels['on']['train'] + vels['none']['train'])) train_ground_truth = np.concatenate([[1] * N_TRAIN, [-1] * N_TRAIN]) train_accuracy = 100 * np.mean(train_predictions == train_ground_truth) # make predictions on test set test_trajs = np.array(vels['on']['test'] + vels['none']['test']) test_ground_truth = np.concatenate([[1] * N_TEST, [-1] * N_TEST]) # shuffle trajectories and ground truths for good luck rand_idx = np.random.permutation(len(test_trajs)) test_trajs = test_trajs[rand_idx] test_ground_truth = test_ground_truth[rand_idx] # predict test_predictions = np.array(clf.predict(test_trajs)) test_accuracy = 100 * np.mean(test_predictions == test_ground_truth) # store values for later plotting train_prediction_accuracy[condition][method].append(train_accuracy) test_prediction_accuracy[condition][method].append(test_accuracy) # make plot for method in METHODS: fig, axs = plt.subplots(2, 1, facecolor=FACE_COLOR, figsize=FIG_SIZE, sharex=True, tight_layout=True) axs[0].hist(test_prediction_accuracy['control'][method], normed=True, color='b', lw=0) axs[0].hist(test_prediction_accuracy['experimental'][method], normed=True, color='g', lw=0) axs[1].hist(train_prediction_accuracy['control'][method], normed=True, color='b', lw=0) axs[1].hist(train_prediction_accuracy['experimental'][method], normed=True, color='g', lw=0) axs[0].legend( ['Training examples from same class', 'Training examples from different classes'], loc='best', fontsize=FONT_SIZE, ) axs[0].set_xlabel('Test set prediction accuracy (%)') axs[0].set_ylabel('Probability') axs[1].set_xlabel('Training set prediction accuracy (%)') axs[1].set_ylabel('Probability') axs[0].set_title( 'Experiment: {}\n {} training, {} test\n{} classifier'.format(EXPERIMENT_ID, N_TRAIN, N_TEST, method)) for ax in axs: axis_tools.set_fontsize(ax, FONT_SIZE) fig.savefig('/Users/rkp/Desktop/classifier_{}_method_{}.png'.format(EXPERIMENT_ID, method))