Exemplo n.º 1
0
def test_distance_rank_window():
    """Test distance rank window analysis."""
    distances = np.array([
        [0, 3, 1, 1, 2, 2, 2, 2],
        [3, 0, 1, 1, 2, 2, 2, 2],
        [1, 1, 0, 4, 2, 2, 2, 2],
        [1, 1, 4, 0, 2, 2, 2, 2],
        [2, 2, 2, 2, 0, 5, 1, 1],
        [2, 2, 2, 2, 5, 0, 1, 1],
        [2, 2, 2, 2, 1, 1, 0, 6],
        [2, 2, 2, 2, 1, 1, 6, 0],
    ])
    subjects = [1]
    study = [[
        'absence', 'hollow', 'pupil', 'fountain', 'piano', 'pillow', 'cat',
        'tree'
    ]]
    recall = [[
        'fountain', 'hollow', 'absence', 'cat', 'piano', 'pupil', 'fountain'
    ]]
    item_index = ([[0, 1, 2, 3, 4, 5, 6, 7]], [[3, 1, 0, 6, 4, 2, 3]])
    raw = fr.table_from_lists(subjects, study, recall, item_index=item_index)
    data = fr.merge_free_recall(raw, list_keys=['item_index'])
    stat = fr.distance_rank_window(data, 'item_index', distances, [-1, 0, 1])
    expected = np.array([[0.875, 0.875, 0.375], [0, 1, 1], [0, 0, 0]])
    np.testing.assert_allclose(np.mean(expected, 0), stat['rank'].to_numpy())
Exemplo n.º 2
0
def test_category_clustering():
    """Test category clustering statistics."""
    subject = [1] * 2

    # category of study and list items (two cases from category
    # clustering tests)
    study_category = [list('abcd') * 4] * 2
    recall_str = ['aaabbbcccddd', 'aabbcdcd']
    recall_category = [list(s) for s in recall_str]

    # unique item codes (needed for merging study and recall events;
    # not actually needed for the stats)
    study_item = [[i for i in range(len(c))] for c in study_category]
    recall_item = [[0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11],
                   [0, 4, 1, 5, 2, 3, 6, 7]]

    # create merged free recall data
    raw = fr.table_from_lists(subject,
                              study_item,
                              recall_item,
                              category=(study_category, recall_category))
    data = fr.merge_free_recall(raw, list_keys=['category'])

    # test ARC and LBC stats
    stats = fr.category_clustering(data, 'category')
    np.testing.assert_allclose(stats.loc[1, 'arc'], 0.667, rtol=0.011)
    np.testing.assert_allclose(stats.loc[1, 'lbc'], 3.2, rtol=0.011)
Exemplo n.º 3
0
def test_distance_rank_shifted():
    """Test shifted distance rank analysis."""
    distances = np.array([
        [0, 1, 1, 1, 2, 2, 2, 2],
        [1, 0, 1, 4, 2, 2, 2, 2],
        [1, 1, 0, 1, 2, 2, 2, 2],
        [1, 4, 1, 0, 2, 2, 2, 2],
        [2, 2, 2, 2, 0, 3, 3, 3],
        [2, 2, 2, 2, 3, 0, 3, 3],
        [2, 2, 2, 2, 3, 3, 0, 3],
        [2, 2, 2, 2, 3, 3, 3, 0],
    ])
    subjects = [1]
    study = [[
        'absence', 'hollow', 'pupil', 'fountain', 'piano', 'pillow', 'cat',
        'tree'
    ]]
    recall = [[
        'piano', 'fountain', 'hollow', 'tree', 'fountain', 'absence', 'cat',
        'pupil'
    ]]
    item_index = ([[0, 1, 2, 3, 4, 5, 6, 7]], [[4, 3, 1, 7, 3, 0, 6, 2]])
    raw = fr.table_from_lists(subjects, study, recall, item_index=item_index)
    data = fr.merge_free_recall(raw, list_keys=['item_index'])
    stat = fr.distance_rank_shifted(data, 'item_index', distances, 2)

    expected = np.array([0.683333, 0.416667])
    np.testing.assert_allclose(expected,
                               stat['rank'].to_numpy(),
                               atol=0.000001)
Exemplo n.º 4
0
def test_lag_crp_compound():
    """Test compound lag-CRP analysis."""
    subjects = [1, 1]
    study = [['absence', 'hollow', 'pupil', 'fountain'],
             ['tree', 'cat', 'house', 'dog']]
    recall = [['fountain', 'hollow', 'absence'],
              ['mouse', 'cat', 'tree', 'house', 'dog']]
    raw = fr.table_from_lists(subjects, study, recall)
    data = fr.merge_free_recall(raw)
    crp = fr.lag_crp_compound(data)
    # -2, -1
    # NaN, -1, +2, +1
    actual = np.hstack([
        [0, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 1, 0],
        [0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0],
        [0, 0, 0, 0, 0, 0, 0],
    ])
    possible = np.hstack([
        [0, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 1, 0, 0],
        [0, 0, 0, 0, 0, 1, 1],
        [0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0],
        [0, 0, 0, 0, 0, 0, 0],
    ])
    np.testing.assert_array_equal(actual, crp['actual'].to_numpy())
    np.testing.assert_array_equal(possible, crp['possible'].to_numpy())
Exemplo n.º 5
0
 def setup(self):
     data_file = resource_filename('psifr', 'data/Morton2013.csv')
     self.raw = pd.read_csv(data_file, dtype={'category': 'category'})
     self.raw.category.cat.as_ordered(inplace=True)
     self.data = fr.merge_free_recall(
         self.raw,
         list_keys=['list_type', 'list_category'],
         study_keys=['category'])
Exemplo n.º 6
0
def split_data(data):
    """Data split into study and recall."""
    merged = fr.merge_free_recall(data, study_keys=['distract'])
    split = {
        'study': fr.split_lists(merged, 'study', ['input', 'distract']),
        'recall': fr.split_lists(merged, 'recall', ['input']),
    }
    return split
Exemplo n.º 7
0
def test_pli_list_lag():
    """Test proportion of list lags for prior-list intrusions."""
    subjects = [1, 1, 1, 1, 2, 2, 2, 2]
    study = [
        ['tree', 'cat'],
        ['absence', 'hollow'],
        ['fountain', 'piano'],
        ['pillow', 'pupil'],
        ['tree', 'cat'],
        ['absence', 'hollow'],
        ['fountain', 'piano'],
        ['pillow', 'pupil'],
    ]
    recall = [
        ['tree', 'cat'],
        ['hollow', 'absence'],
        ['fountain', 'hollow'],
        ['absence', 'piano', 'cat'],
        ['tree', 'cat'],
        ['absence', 'hollow'],
        ['fountain', 'piano'],
        ['pillow', 'pupil'],
    ]
    raw = fr.table_from_lists(subjects, study, recall)
    data = fr.merge_free_recall(raw)

    # max lag 1 (exclude just the first list)
    stat = fr.pli_list_lag(data, max_lag=1)
    np.testing.assert_array_equal(stat['count'].to_numpy(), np.array([2, 0]))
    np.testing.assert_array_equal(stat['per_list'].to_numpy(),
                                  np.array([2 / 3, 0]))
    np.testing.assert_array_equal(stat['prob'].to_numpy(),
                                  np.array([0.5, np.nan]))

    # max lag 2 (exclude first two lists)
    stat = fr.pli_list_lag(data, max_lag=2)
    np.testing.assert_array_equal(stat['count'].to_numpy(),
                                  np.array([2, 1, 0, 0]))
    np.testing.assert_array_equal(stat['per_list'].to_numpy(),
                                  np.array([1, 0.5, 0, 0]))
    np.testing.assert_array_equal(stat['prob'].to_numpy(),
                                  np.array([0.5, 0.25, np.nan, np.nan]))

    # max lag 3 (exclude first three lists)
    stat = fr.pli_list_lag(data, max_lag=3)
    np.testing.assert_array_equal(stat['count'].to_numpy(),
                                  np.array([1, 1, 1, 0, 0, 0]))
    np.testing.assert_array_equal(stat['per_list'].to_numpy(),
                                  np.array([1, 1, 1, 0, 0, 0]))
    np.testing.assert_array_equal(
        stat['prob'].to_numpy(),
        np.array([1 / 3, 1 / 3, 1 / 3, np.nan, np.nan, np.nan]))
Exemplo n.º 8
0
def test_pli(raw):
    """Test labeling of prior-list intrusions."""
    data = raw.copy()
    data.loc[3:5, 'item'] = ['hollow', 'pupil', 'fountain']
    data.loc[9:11, 'item'] = ['pillow', 'fountain', 'pupil']
    merged = fr.merge_free_recall(data)
    assert 'prior_list' in merged.columns
    assert 'prior_input' in merged.columns

    # check the PLI (prior-list intrusion) in the second list
    pli = merged.query('item == "pupil" and output == 3')
    pli = pli.reset_index().loc[0]
    assert pli['prior_list'] == 1
    assert pli['prior_input'] == 3

    # check the FLI (future-list intrusion) in the first list
    fli = merged.query('item == "fountain" and output == 3')
    assert np.isnan(fli['prior_list'].to_numpy()[0])
    assert np.isnan(fli['prior_input'].to_numpy()[0])
Exemplo n.º 9
0
def data():
    raw = pd.DataFrame({
        'subject': [
            1,
            1,
            1,
            1,
            1,
            1,
            1,
            1,
            1,
            1,
            1,
            1,
        ],
        'list': [
            1,
            1,
            1,
            1,
            1,
            1,
            2,
            2,
            2,
            2,
            2,
            2,
        ],
        'trial_type': [
            'study',
            'study',
            'study',
            'recall',
            'recall',
            'recall',
            'study',
            'study',
            'study',
            'recall',
            'recall',
            'recall',
        ],
        'position': [
            1,
            2,
            3,
            1,
            2,
            3,
            1,
            2,
            3,
            1,
            2,
            3,
        ],
        'item': [
            'absence',
            'hollow',
            'pupil',
            'pupil',
            'absence',
            'empty',
            'fountain',
            'piano',
            'pillow',
            'pillow',
            'fountain',
            'pillow',
        ],
        'item_index': [
            0,
            1,
            2,
            1,
            2,
            np.nan,
            3,
            4,
            5,
            5,
            3,
            5,
        ],
        'task': [
            1,
            2,
            1,
            2,
            1,
            np.nan,
            1,
            2,
            1,
            1,
            1,
            1,
        ],
    })
    data = fr.merge_free_recall(raw,
                                study_keys=['task'],
                                list_keys=['item_index'])
    return data
Exemplo n.º 10
0
Arquivo: fit.py Projeto: mortonne/cymr
def prepare_lists(data, study_keys=None, recall_keys=None, clean=True):
    """
    Prepare study and recall data for simulation.

    Return data information split by list. This format is similar to
    frdata structs used in EMBAM.

    Parameters
    ----------
    data : pandas.DataFrame
        Free recall data in Psifr format.

    study_keys : list of str, optional
        Columns to export for study list data. Default is:
        ['input', 'item_index']. Input position is assumed to be
        one-indexed.

    recall_keys : list of str, optional
        Columns to export for recall list data. Default is: ['input'].
        Input position is assumed to be one-indexed.

    clean : bool, optional
        If true, repeats and intrusions will be removed.

    Returns
    -------
    study : dict of (str: list of numpy.array)
        Study columns in list format.

    recall : dict of (str: list of numpy.array)
        Recall columns in list format.
    """
    if study_keys is None:
        study_keys = ['input', 'item_index']

    if recall_keys is None:
        recall_keys = ['input', 'item_index']

    s_keys = study_keys.copy()
    s_keys.remove('input')
    r_keys = recall_keys.copy()
    r_keys.remove('input')
    if 'item_index' in r_keys:
        r_keys.remove('item_index')
    merged = fr.merge_free_recall(data, study_keys=s_keys, recall_keys=r_keys)
    if clean:
        merged = merged.query('~intrusion and repeat == 0')

    study = fr.split_lists(merged, 'study', study_keys)
    recall = fr.split_lists(merged, 'recall', recall_keys)

    for i in range(len(study['input'])):
        if 'input' in study_keys:
            study['input'][i] = study['input'][i].astype(int) - 1
        if 'item_index' in study_keys:
            study['item_index'][i] = study['item_index'][i].astype(int)

        if 'input' in recall_keys:
            recall['input'][i] = recall['input'][i].astype(int) - 1
        if 'item_index' in recall_keys:
            recall['item_index'][i] = recall['item_index'][i].astype(int)

    n = np.unique([len(items) for items in study['input']])
    if len(n) > 1:
        raise ValueError('List length must not vary.')
    return study, recall
Exemplo n.º 11
0
def visualize_fit(model_class,
                  parameters,
                  data,
                  data_query=None,
                  experiment_count=1000,
                  savefig=False):
    """
    Apply organizational analyses to visually compare the behavior of the model with these parameters against
    specified dataset.
    """

    # generate simulation data from model
    model = model_class(**parameters)
    try:
        model.experience(np.eye(model.item_count, model.item_count + 1, 1))
    except ValueError:
        model.experience(np.eye(model.item_count, model.item_count))
    sim = []
    for experiment in range(experiment_count):
        sim += [[experiment, 0, 'study', i + 1, i]
                for i in range(model.item_count)]
    for experiment in range(experiment_count):
        sim += [[experiment, 0, 'recall', i + 1, o]
                for i, o in enumerate(model.free_recall())]
    sim = pd.DataFrame(
        sim, columns=['subject', 'list', 'trial_type', 'position', 'item'])
    sim_data = fr.merge_free_recall(sim)

    # generate simulation-based spc, pnr, lag_crp
    sim_spc = fr.spc(sim_data).reset_index()
    sim_pfr = fr.pnr(sim_data).query('output <= 1').reset_index()
    sim_lag_crp = fr.lag_crp(sim_data).reset_index()

    # generate data-based spc, pnr, lag_crp
    data_spc = fr.spc(data).query(data_query).reset_index()
    data_pfr = fr.pnr(data).query('output <= 1').query(
        data_query).reset_index()
    data_lag_crp = fr.lag_crp(data).query(data_query).reset_index()

    # combine representations
    data_spc['Source'] = 'Data'
    sim_spc['Source'] = model_class.__name__
    combined_spc = pd.concat([data_spc, sim_spc], axis=0)

    data_pfr['Source'] = 'Data'
    sim_pfr['Source'] = model_class.__name__
    combined_pfr = pd.concat([data_pfr, sim_pfr], axis=0)

    data_lag_crp['Source'] = 'Data'
    sim_lag_crp['Source'] = model_class.__name__
    combined_lag_crp = pd.concat([data_lag_crp, sim_lag_crp], axis=0)

    # generate plots of result
    # spc
    g = sns.FacetGrid(dropna=False, data=combined_spc)
    g.map_dataframe(sns.lineplot, x='input', y='recall', hue='Source')
    g.set_xlabels('Serial position')
    g.set_ylabels('Recall probability')
    plt.title('P(Recall) by Serial Position Curve')
    g.add_legend()
    g.set(ylim=(0, 1))
    if savefig:
        plt.savefig('figures/{}_fit_spc.jpeg'.format(model_class.__name__),
                    bbox_inches='tight')
    else:
        plt.show()

    #pdf
    h = sns.FacetGrid(dropna=False, data=combined_pfr)
    h.map_dataframe(sns.lineplot, x='input', y='prob', hue='Source')
    h.set_xlabels('Serial position')
    h.set_ylabels('Probability of First Recall')
    plt.title('P(First Recall) by Serial Position')
    h.add_legend()
    h.set(ylim=(0, 1))
    if savefig:
        plt.savefig('figures/{}_fit_pfr.jpeg'.format(model_class.__name__),
                    bbox_inches='tight')
    else:
        plt.show()

    # lag crp
    max_lag = 5
    filt_neg = f'{-max_lag} <= lag < 0'
    filt_pos = f'0 < lag <= {max_lag}'
    i = sns.FacetGrid(dropna=False, data=combined_lag_crp)
    i.map_dataframe(lambda data, **kws: sns.lineplot(
        data=data.query(filt_neg), x='lag', y='prob', hue='Source', **kws))
    i.map_dataframe(lambda data, **kws: sns.lineplot(
        data=data.query(filt_pos), x='lag', y='prob', hue='Source', **kws))
    i.set_xlabels('Lag')
    i.set_ylabels('Recall Probability')
    plt.title('Recall Probability by Item Lag')
    i.add_legend()
    i.set(ylim=(0, 1))
    if savefig:
        plt.savefig('figures/{}_fit_crp.jpeg'.format(model_class.__name__),
                    bbox_inches='tight')
    else:
        plt.show()
Exemplo n.º 12
0
def data(raw):
    data = fr.merge_free_recall(
        raw, study_keys=['task'], list_keys=['item_index']
    )
    return data
Exemplo n.º 13
0
def data(raw):
    """Create merged free recall data."""
    data = fr.merge_free_recall(raw,
                                study_keys=['task', 'block'],
                                list_keys=['item_index'])
    return data
Exemplo n.º 14
0
 def time_merge(self):
     data = fr.merge_free_recall(self.raw,
                                 list_keys=['list_type', 'list_category'],
                                 study_keys=['category'])