def test_distance_rank_window(): """Test distance rank window analysis.""" distances = np.array([ [0, 3, 1, 1, 2, 2, 2, 2], [3, 0, 1, 1, 2, 2, 2, 2], [1, 1, 0, 4, 2, 2, 2, 2], [1, 1, 4, 0, 2, 2, 2, 2], [2, 2, 2, 2, 0, 5, 1, 1], [2, 2, 2, 2, 5, 0, 1, 1], [2, 2, 2, 2, 1, 1, 0, 6], [2, 2, 2, 2, 1, 1, 6, 0], ]) subjects = [1] study = [[ 'absence', 'hollow', 'pupil', 'fountain', 'piano', 'pillow', 'cat', 'tree' ]] recall = [[ 'fountain', 'hollow', 'absence', 'cat', 'piano', 'pupil', 'fountain' ]] item_index = ([[0, 1, 2, 3, 4, 5, 6, 7]], [[3, 1, 0, 6, 4, 2, 3]]) raw = fr.table_from_lists(subjects, study, recall, item_index=item_index) data = fr.merge_free_recall(raw, list_keys=['item_index']) stat = fr.distance_rank_window(data, 'item_index', distances, [-1, 0, 1]) expected = np.array([[0.875, 0.875, 0.375], [0, 1, 1], [0, 0, 0]]) np.testing.assert_allclose(np.mean(expected, 0), stat['rank'].to_numpy())
def test_category_clustering(): """Test category clustering statistics.""" subject = [1] * 2 # category of study and list items (two cases from category # clustering tests) study_category = [list('abcd') * 4] * 2 recall_str = ['aaabbbcccddd', 'aabbcdcd'] recall_category = [list(s) for s in recall_str] # unique item codes (needed for merging study and recall events; # not actually needed for the stats) study_item = [[i for i in range(len(c))] for c in study_category] recall_item = [[0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11], [0, 4, 1, 5, 2, 3, 6, 7]] # create merged free recall data raw = fr.table_from_lists(subject, study_item, recall_item, category=(study_category, recall_category)) data = fr.merge_free_recall(raw, list_keys=['category']) # test ARC and LBC stats stats = fr.category_clustering(data, 'category') np.testing.assert_allclose(stats.loc[1, 'arc'], 0.667, rtol=0.011) np.testing.assert_allclose(stats.loc[1, 'lbc'], 3.2, rtol=0.011)
def test_distance_rank_shifted(): """Test shifted distance rank analysis.""" distances = np.array([ [0, 1, 1, 1, 2, 2, 2, 2], [1, 0, 1, 4, 2, 2, 2, 2], [1, 1, 0, 1, 2, 2, 2, 2], [1, 4, 1, 0, 2, 2, 2, 2], [2, 2, 2, 2, 0, 3, 3, 3], [2, 2, 2, 2, 3, 0, 3, 3], [2, 2, 2, 2, 3, 3, 0, 3], [2, 2, 2, 2, 3, 3, 3, 0], ]) subjects = [1] study = [[ 'absence', 'hollow', 'pupil', 'fountain', 'piano', 'pillow', 'cat', 'tree' ]] recall = [[ 'piano', 'fountain', 'hollow', 'tree', 'fountain', 'absence', 'cat', 'pupil' ]] item_index = ([[0, 1, 2, 3, 4, 5, 6, 7]], [[4, 3, 1, 7, 3, 0, 6, 2]]) raw = fr.table_from_lists(subjects, study, recall, item_index=item_index) data = fr.merge_free_recall(raw, list_keys=['item_index']) stat = fr.distance_rank_shifted(data, 'item_index', distances, 2) expected = np.array([0.683333, 0.416667]) np.testing.assert_allclose(expected, stat['rank'].to_numpy(), atol=0.000001)
def test_lag_crp_compound(): """Test compound lag-CRP analysis.""" subjects = [1, 1] study = [['absence', 'hollow', 'pupil', 'fountain'], ['tree', 'cat', 'house', 'dog']] recall = [['fountain', 'hollow', 'absence'], ['mouse', 'cat', 'tree', 'house', 'dog']] raw = fr.table_from_lists(subjects, study, recall) data = fr.merge_free_recall(raw) crp = fr.lag_crp_compound(data) # -2, -1 # NaN, -1, +2, +1 actual = np.hstack([ [0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0], ]) possible = np.hstack([ [0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0], ]) np.testing.assert_array_equal(actual, crp['actual'].to_numpy()) np.testing.assert_array_equal(possible, crp['possible'].to_numpy())
def setup(self): data_file = resource_filename('psifr', 'data/Morton2013.csv') self.raw = pd.read_csv(data_file, dtype={'category': 'category'}) self.raw.category.cat.as_ordered(inplace=True) self.data = fr.merge_free_recall( self.raw, list_keys=['list_type', 'list_category'], study_keys=['category'])
def split_data(data): """Data split into study and recall.""" merged = fr.merge_free_recall(data, study_keys=['distract']) split = { 'study': fr.split_lists(merged, 'study', ['input', 'distract']), 'recall': fr.split_lists(merged, 'recall', ['input']), } return split
def test_pli_list_lag(): """Test proportion of list lags for prior-list intrusions.""" subjects = [1, 1, 1, 1, 2, 2, 2, 2] study = [ ['tree', 'cat'], ['absence', 'hollow'], ['fountain', 'piano'], ['pillow', 'pupil'], ['tree', 'cat'], ['absence', 'hollow'], ['fountain', 'piano'], ['pillow', 'pupil'], ] recall = [ ['tree', 'cat'], ['hollow', 'absence'], ['fountain', 'hollow'], ['absence', 'piano', 'cat'], ['tree', 'cat'], ['absence', 'hollow'], ['fountain', 'piano'], ['pillow', 'pupil'], ] raw = fr.table_from_lists(subjects, study, recall) data = fr.merge_free_recall(raw) # max lag 1 (exclude just the first list) stat = fr.pli_list_lag(data, max_lag=1) np.testing.assert_array_equal(stat['count'].to_numpy(), np.array([2, 0])) np.testing.assert_array_equal(stat['per_list'].to_numpy(), np.array([2 / 3, 0])) np.testing.assert_array_equal(stat['prob'].to_numpy(), np.array([0.5, np.nan])) # max lag 2 (exclude first two lists) stat = fr.pli_list_lag(data, max_lag=2) np.testing.assert_array_equal(stat['count'].to_numpy(), np.array([2, 1, 0, 0])) np.testing.assert_array_equal(stat['per_list'].to_numpy(), np.array([1, 0.5, 0, 0])) np.testing.assert_array_equal(stat['prob'].to_numpy(), np.array([0.5, 0.25, np.nan, np.nan])) # max lag 3 (exclude first three lists) stat = fr.pli_list_lag(data, max_lag=3) np.testing.assert_array_equal(stat['count'].to_numpy(), np.array([1, 1, 1, 0, 0, 0])) np.testing.assert_array_equal(stat['per_list'].to_numpy(), np.array([1, 1, 1, 0, 0, 0])) np.testing.assert_array_equal( stat['prob'].to_numpy(), np.array([1 / 3, 1 / 3, 1 / 3, np.nan, np.nan, np.nan]))
def test_pli(raw): """Test labeling of prior-list intrusions.""" data = raw.copy() data.loc[3:5, 'item'] = ['hollow', 'pupil', 'fountain'] data.loc[9:11, 'item'] = ['pillow', 'fountain', 'pupil'] merged = fr.merge_free_recall(data) assert 'prior_list' in merged.columns assert 'prior_input' in merged.columns # check the PLI (prior-list intrusion) in the second list pli = merged.query('item == "pupil" and output == 3') pli = pli.reset_index().loc[0] assert pli['prior_list'] == 1 assert pli['prior_input'] == 3 # check the FLI (future-list intrusion) in the first list fli = merged.query('item == "fountain" and output == 3') assert np.isnan(fli['prior_list'].to_numpy()[0]) assert np.isnan(fli['prior_input'].to_numpy()[0])
def data(): raw = pd.DataFrame({ 'subject': [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ], 'list': [ 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, ], 'trial_type': [ 'study', 'study', 'study', 'recall', 'recall', 'recall', 'study', 'study', 'study', 'recall', 'recall', 'recall', ], 'position': [ 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, ], 'item': [ 'absence', 'hollow', 'pupil', 'pupil', 'absence', 'empty', 'fountain', 'piano', 'pillow', 'pillow', 'fountain', 'pillow', ], 'item_index': [ 0, 1, 2, 1, 2, np.nan, 3, 4, 5, 5, 3, 5, ], 'task': [ 1, 2, 1, 2, 1, np.nan, 1, 2, 1, 1, 1, 1, ], }) data = fr.merge_free_recall(raw, study_keys=['task'], list_keys=['item_index']) return data
def prepare_lists(data, study_keys=None, recall_keys=None, clean=True): """ Prepare study and recall data for simulation. Return data information split by list. This format is similar to frdata structs used in EMBAM. Parameters ---------- data : pandas.DataFrame Free recall data in Psifr format. study_keys : list of str, optional Columns to export for study list data. Default is: ['input', 'item_index']. Input position is assumed to be one-indexed. recall_keys : list of str, optional Columns to export for recall list data. Default is: ['input']. Input position is assumed to be one-indexed. clean : bool, optional If true, repeats and intrusions will be removed. Returns ------- study : dict of (str: list of numpy.array) Study columns in list format. recall : dict of (str: list of numpy.array) Recall columns in list format. """ if study_keys is None: study_keys = ['input', 'item_index'] if recall_keys is None: recall_keys = ['input', 'item_index'] s_keys = study_keys.copy() s_keys.remove('input') r_keys = recall_keys.copy() r_keys.remove('input') if 'item_index' in r_keys: r_keys.remove('item_index') merged = fr.merge_free_recall(data, study_keys=s_keys, recall_keys=r_keys) if clean: merged = merged.query('~intrusion and repeat == 0') study = fr.split_lists(merged, 'study', study_keys) recall = fr.split_lists(merged, 'recall', recall_keys) for i in range(len(study['input'])): if 'input' in study_keys: study['input'][i] = study['input'][i].astype(int) - 1 if 'item_index' in study_keys: study['item_index'][i] = study['item_index'][i].astype(int) if 'input' in recall_keys: recall['input'][i] = recall['input'][i].astype(int) - 1 if 'item_index' in recall_keys: recall['item_index'][i] = recall['item_index'][i].astype(int) n = np.unique([len(items) for items in study['input']]) if len(n) > 1: raise ValueError('List length must not vary.') return study, recall
def visualize_fit(model_class, parameters, data, data_query=None, experiment_count=1000, savefig=False): """ Apply organizational analyses to visually compare the behavior of the model with these parameters against specified dataset. """ # generate simulation data from model model = model_class(**parameters) try: model.experience(np.eye(model.item_count, model.item_count + 1, 1)) except ValueError: model.experience(np.eye(model.item_count, model.item_count)) sim = [] for experiment in range(experiment_count): sim += [[experiment, 0, 'study', i + 1, i] for i in range(model.item_count)] for experiment in range(experiment_count): sim += [[experiment, 0, 'recall', i + 1, o] for i, o in enumerate(model.free_recall())] sim = pd.DataFrame( sim, columns=['subject', 'list', 'trial_type', 'position', 'item']) sim_data = fr.merge_free_recall(sim) # generate simulation-based spc, pnr, lag_crp sim_spc = fr.spc(sim_data).reset_index() sim_pfr = fr.pnr(sim_data).query('output <= 1').reset_index() sim_lag_crp = fr.lag_crp(sim_data).reset_index() # generate data-based spc, pnr, lag_crp data_spc = fr.spc(data).query(data_query).reset_index() data_pfr = fr.pnr(data).query('output <= 1').query( data_query).reset_index() data_lag_crp = fr.lag_crp(data).query(data_query).reset_index() # combine representations data_spc['Source'] = 'Data' sim_spc['Source'] = model_class.__name__ combined_spc = pd.concat([data_spc, sim_spc], axis=0) data_pfr['Source'] = 'Data' sim_pfr['Source'] = model_class.__name__ combined_pfr = pd.concat([data_pfr, sim_pfr], axis=0) data_lag_crp['Source'] = 'Data' sim_lag_crp['Source'] = model_class.__name__ combined_lag_crp = pd.concat([data_lag_crp, sim_lag_crp], axis=0) # generate plots of result # spc g = sns.FacetGrid(dropna=False, data=combined_spc) g.map_dataframe(sns.lineplot, x='input', y='recall', hue='Source') g.set_xlabels('Serial position') g.set_ylabels('Recall probability') plt.title('P(Recall) by Serial Position Curve') g.add_legend() g.set(ylim=(0, 1)) if savefig: plt.savefig('figures/{}_fit_spc.jpeg'.format(model_class.__name__), bbox_inches='tight') else: plt.show() #pdf h = sns.FacetGrid(dropna=False, data=combined_pfr) h.map_dataframe(sns.lineplot, x='input', y='prob', hue='Source') h.set_xlabels('Serial position') h.set_ylabels('Probability of First Recall') plt.title('P(First Recall) by Serial Position') h.add_legend() h.set(ylim=(0, 1)) if savefig: plt.savefig('figures/{}_fit_pfr.jpeg'.format(model_class.__name__), bbox_inches='tight') else: plt.show() # lag crp max_lag = 5 filt_neg = f'{-max_lag} <= lag < 0' filt_pos = f'0 < lag <= {max_lag}' i = sns.FacetGrid(dropna=False, data=combined_lag_crp) i.map_dataframe(lambda data, **kws: sns.lineplot( data=data.query(filt_neg), x='lag', y='prob', hue='Source', **kws)) i.map_dataframe(lambda data, **kws: sns.lineplot( data=data.query(filt_pos), x='lag', y='prob', hue='Source', **kws)) i.set_xlabels('Lag') i.set_ylabels('Recall Probability') plt.title('Recall Probability by Item Lag') i.add_legend() i.set(ylim=(0, 1)) if savefig: plt.savefig('figures/{}_fit_crp.jpeg'.format(model_class.__name__), bbox_inches='tight') else: plt.show()
def data(raw): data = fr.merge_free_recall( raw, study_keys=['task'], list_keys=['item_index'] ) return data
def data(raw): """Create merged free recall data.""" data = fr.merge_free_recall(raw, study_keys=['task', 'block'], list_keys=['item_index']) return data
def time_merge(self): data = fr.merge_free_recall(self.raw, list_keys=['list_type', 'list_category'], study_keys=['category'])