示例#1
0
def p_reject(rejects_matrix = None, subject = None, rec_mask = None, recalls = None):
    """
    P_REJECT Computes probability of rejecting recalled items.

    p_rejects = p_reject(reject_matrix, rec_mask)

    INPUTS:
        rejects_matrix: a matrix whose elements indicates whether recalled
                        items were rejected or accepted as correct items
                        in externalized free recall (EFR).
                        The rows of this matrix should
                        represent recalls made by a single subject on a single
                        trial. An element of the rejected matrix should be
                        equal to 1 if and only if that item was rejected.

        subjects:       a column vector which indexes the rows of recalls_matrix
                        with a subject number (or other identifier). That is,
                        the recall trials of subject S should be located in
                        recalls_matrix(find(subjects==S), :smile:

        rec_mask:       if given, a logical matrix of the same shape as
                        recalls_matrix, which is false at positions (i, j) where
                        the value at recalls_matrix(i, j) should be excluded from
                        the calculation of the probability of recall. If NOT
                        given, a standard clean recalls mask is used, which
                        excludes repeats, intrusions and empty cells


    OUTPUTS:
        p_reject:       a vector of probablities. Rows are indexed by subject.
    """
    if rejects_matrix is None:
        raise Exception('You must pass a rejects matrix.')
    elif subject is None:
        raise Exception('You must pass a subject.')
    elif rec_mask is None:
        rec_mask = mask.make_clean_recalls_mask2d(recalls)
    elif len(rejects_matrix) != len(subject):
        raise Exception('rejects matrix needs to be same length as subjects.')
    subjects = np.unique(subject)
    result = []
    for subj in subjects:

        for subj_ind, subj_num in enumerate(subject):
            if subj == subj_num:
                denom = 0
                num = 0
        for subj_ind, subj_num in enumerate(subject):
            if subj == subj_num:
                for index, item in enumerate(rec_mask[subj_ind]):
                    if item == 1:
                        denom += 1
                        if rejects_matrix[subj_ind][index] == 1:
                            num += 1
        print(subj, denom, num)
        if denom != 0:
            result.append(num / float(denom))
        else:
            result.append(0)
    return result
示例#2
0
文件: sem_crp.py 项目: pennmem/pybeh
def sem_crp(recalls=None,
            recalls_itemnos=None,
            pres_itemnos=None,
            subjects=None,
            sem_sims=None,
            n_bins=10,
            listLength=None):
    """sanity check"""
    if recalls_itemnos is None:
        raise Exception('You must pass a recalls-by-item-numbers matrix.')
    elif pres_itemnos is None:
        raise Exception(
            'You must pass a presentations-by-item-numbers matrix.')
    elif sem_sims is None:
        raise Exception('You must pass a semantic similarity matrix.')
    elif subjects is None:
        raise Exception('You must pass a subjects vector.')
    elif listLength is None:
        raise Exception('You must pass a listLength')
    elif len(recalls_itemnos) != len(subjects):
        raise Exception(
            'recalls matrix must have the same number of rows as subjects.')

    # Make sure that all input arrays and matrices are numpy arrays
    recalls = np.array(recalls, dtype=int)
    recalls_itemnos = np.array(recalls_itemnos, dtype=int)
    pres_itemnos = np.array(pres_itemnos, dtype=int)
    subjects = np.array(subjects)
    sem_sims = np.array(sem_sims)

    # Set diagonal of the similarity matrix to nan
    np.fill_diagonal(sem_sims, np.nan)
    # Sort and split all similarities into equally sized bins
    all_sim = sem_sims.flatten()
    all_sim = np.sort(all_sim[~np.isnan(all_sim)])
    bins = np.array_split(all_sim, n_bins)
    bins = [b[0] for b in bins]
    # Convert the similarity matrix to bin numbers for easy bin lookup later
    bin_sims = np.digitize(sem_sims, bins) - 1

    # Convert recalled item numbers to the corresponding indices of the similarity matrix by subtracting 1
    recalls_itemnos -= 1
    pres_itemnos -= 1

    usub = np.unique(subjects)
    bin_means = np.zeros((len(usub), n_bins))
    crp = np.zeros((len(usub), n_bins))
    # For each subject
    for i, subj in enumerate(usub):
        # Create a filter to select only the current subject's data
        subj_mask = subjects == subj
        subj_recalls = recalls[subj_mask]
        subj_rec_itemnos = recalls_itemnos[subj_mask]
        subj_pres_itemnos = pres_itemnos[subj_mask]

        # Create trials x items matrix where item j, k indicates whether the kth recall on trial j was a correct recall
        clean_recalls_mask = np.array(make_clean_recalls_mask2d(subj_recalls))

        # Setup counts for number of possible and actual transitions, as well as the sim value of actual transitions
        actual = np.zeros(n_bins)
        poss = np.zeros(n_bins)
        val = np.zeros(n_bins)

        # For each of the current subject's trials
        for j, trial_recs in enumerate(subj_recalls):
            seen = set()
            # For each recall on the current trial
            for k, rec in enumerate(trial_recs[:-1]):
                seen.add(rec)
                # Only increment transition counts if the current and next recall are BOTH correct recalls
                if clean_recalls_mask[j, k] and clean_recalls_mask[j, k + 1]:
                    this_recno = subj_rec_itemnos[j, k]
                    next_recno = subj_rec_itemnos[j, k + 1]
                    # Lookup semantic similarity and its bin between current recall and next recall
                    sim = sem_sims[this_recno, next_recno]
                    b = bin_sims[this_recno, next_recno]
                    actual[b] += 1
                    val[b] += sim

                    # Get a list of not-yet-recalled word numbers
                    poss_rec = [
                        subj_pres_itemnos[j][x] for x in range(listLength)
                        if x + 1 not in seen
                    ]
                    # Lookup the similarity bins between the current recall and all possible correct recalls
                    poss_trans = np.unique(
                        [bin_sims[this_recno, itemno] for itemno in poss_rec])
                    for b in poss_trans:
                        poss[b] += 1

        crp[i, :] = actual / poss  # CRP is calculated as number of actual transitions / number of possible ones
        bin_means[
            i, :] = val / actual  # Bin means are defined as the average similarity of actual transitions per bin

    return bin_means, crp
示例#3
0
文件: crl.py 项目: pennmem/pybeh
def crl(recalls=None,
        times=None,
        subjects=None,
        listLength=None,
        lag_num=None,
        skip_first_n=0):
    """
    CRL  Inter-response time as a function of lag.

    Calculates the mean time it takes to move from one word position to another
    as a funciton of lag.  Returns lag-conditional response latency times.

    Please note: if i and j are consecutive words only real transitions are counted meaning
    1) neither i nor j are intrusions ( not == -1)
    2) words that have been recalled cannot be
      transitioned to nor cannot be transitioned from

    FUNCTION:
        crl = crl(recalls, times, subjects, listLength, lag_num);

    INPUT ARGS:
        recalls    - recall positions
        times      - time associated with each recall
        subjects   - subject number associated with each trial
        listLength - number of words in the list
        lag_num    - lag number to output
        skip_first_n - an integer indicating the number of recall transitions to
                       to ignore from the start of the recall period, for the
                       purposes of calculating the CRL. this can be useful to avoid
                       biasing your results, as the first 2-3 transitions are
                       almost always temporally clustered with short IRTs.
                       (DEFAULT=0)


    OUTPUT ARGS:
        crl - a matrix of average crl times by lag position for each subject
    """
    if recalls is None:
        raise Exception('You must pass a recalls matrix.')
    if times is None:
        raise Exception('You must pass a times vector.')
    if subjects is None:
        raise Exception('You must pass a subject vector.')
    if listLength is None:
        raise Exception('You must pass a list length.')
    if lag_num is None:
        lag_num = listLength - 1
    elif lag_num < 1 or lag_num >= listLength or not isinstance(lag_num, int):
        raise ValueError(
            'Lag number needs to be a positive integer that is less than the list length.'
        )
    if not isinstance(skip_first_n, int):
        raise ValueError('skip_first_n must be an integer.')

    # Convert inputs to numpy arrays
    recalls = np.array(recalls)
    times = np.array(times)
    subjects = np.array(subjects)
    # Get a list of unique subjects -- we will calculate a CRP for each
    usub = np.unique(subjects)
    # Number of possible lags = (listLength - 1) * 2 + 1; e.g. a length-24 list can have lags -23 through +23
    num_lags = 2 * listLength - 1
    # Initialize array to store the CRP for each subject (or other unique identifier)
    result = np.zeros((usub.size, num_lags))
    # Initialize arrays to store transition counts
    trans_count = np.empty(num_lags)
    time_count = np.empty(num_lags)

    # For each subject/unique identifier
    for i, subj in enumerate(usub):
        # Reset counts for each participant
        trans_count.fill(0)
        time_count.fill(0)
        cur_recs = recalls[subjects == subj]
        cur_times = times[subjects == subj]
        # Create trials x items matrix where item j, k indicates whether the kth recall on trial j was a correct recall
        clean_recalls_mask = np.array(make_clean_recalls_mask2d(cur_recs))
        # For each trial that matches that identifier
        for j, trial_recs in enumerate(cur_recs):
            for k, rec in enumerate(trial_recs[:-1]):
                # Only increment transition and timing counts if the current and next recall are BOTH correct recalls
                if clean_recalls_mask[j, k] and clean_recalls_mask[
                        j, k + 1] and k >= skip_first_n:
                    next_rec = trial_recs[k + 1]
                    trans = next_rec - rec
                    trans_time = cur_times[j, k + 1] - cur_times[j, k]
                    # Record the transition that was made and its IRT
                    trans_count[trans + listLength - 1] += 1
                    time_count[trans + listLength - 1] += trans_time

        result[i, :] = time_count / trans_count
        result[i, trans_count == 0] = np.nan

    return result[:, listLength - lag_num - 1:listLength + lag_num]
示例#4
0
文件: crp.py 项目: pennmem/pybeh
def crp(recalls=None, subjects=None, listLength=None, lag_num=None, skip_first_n=0):
    '''
    CRP   Conditional response probability as a function of lag (lag-CRP).
    
      lag_crps = crp(recalls_matrix, subjects, list_length, lag_num)
    
      INPUTS:
             recalls:  A 2D iterable whose elements are serial positions of
                       recalled items.  The rows of this array should
                       represent recalls made by a single subject on a
                       single trial.
    
            subjects:  A column vector which indexes the rows of "recalls"
                       with a subject number (or other identifier).  The
                       subject identifiers should be repeated for each
                       row of "recalls" originating from the same subject.
    
         list_length:  A scalar indicating the number of serial positions in
                       the presented lists.  Serial positions are assumed to
                       run from 1:list_length.
    
             lag_num:  A scalar indicating the max number of lags to track.
    
        skip_first_n:  An integer indicating the number of recall
                       transitions to ignore from the start of the recall
                       period, for the purposes of calculating the CRP.
                       This can be useful to avoid biasing your results, as
                       the first 2-3 transitions are almost always
                       temporally clustered.  Note that the first n recalls
                       will still count as already recalled words for the
                       purposes of determining which transitions are
                       possible.  (DEFAULT=0)
    
    
      OUTPUTS:
            lag_crps:  A matrix of lag-CRP values.  Each row contains the
                       values for one subject.  It has as many columns as
                       there are possible transitions (i.e., the length of
                       (-list_length + 1) : (list_length - 1) ).  The center
                       column, corresponding to the "transition of length 0,"
                       is guaranteed to be filled with NaNs.  Any lag_crps
                       element which had no possible transitions for the
                       input data for that subject will also have a value of
                       NaN.

                       For example, if list_length == 4, a row in lag_crps
                       has 7 columns, corresponding to the transitions from
                       -3 to +3:
                       lag-CRPs:     [ 0.1  0.2  0.3  NaN  0.3  0.1  0.0 ]
                       transitions:    -3   -2    -1   0    +1   +2   +3
    '''
    if recalls is None:
        raise Exception('You must pass a recalls matrix.')
    elif subjects is None:
        raise Exception('You must pass a subjects vector.')
    elif listLength is None:
        raise Exception('You must pass a list length.')
    elif len(recalls) != len(subjects):
        raise Exception('recalls matrix must have the same number of rows as subjects.')
    if lag_num is None:
        lag_num = listLength - 1
    elif lag_num < 1 or lag_num >= listLength or not isinstance(lag_num, int):
        raise ValueError('Lag number needs to be a positive integer that is less than the list length.')
    if not isinstance(skip_first_n, int):
        raise ValueError('skip_first_n must be an integer.')

    # Convert recalls and subjects to numpy arrays
    recalls = np.array(recalls)
    subjects = np.array(subjects)
    # Get a list of unique subjects -- we will calculate a CRP for each
    usub = np.unique(subjects)
    # Number of possible lags = (listLength - 1) * 2 + 1; e.g. a length-24 list can have lags -23 through +23
    num_lags = 2 * listLength - 1
    # Initialize array to store the CRP for each subject (or other unique identifier)
    result = np.zeros((usub.size, num_lags))
    # Initialize arrays to store transition counts
    actual = np.empty(num_lags)
    poss = np.empty(num_lags)

    # For each subject/unique identifier
    for i, subj in enumerate(usub):
        # Reset counts for each participant
        actual.fill(0)
        poss.fill(0)
        # Create trials x items matrix where item j, k indicates whether the kth recall on trial j was a correct recall
        clean_recalls_mask = np.array(make_clean_recalls_mask2d(recalls[subjects == subj]))
        # For each trial that matches that identifier
        for j, trial_recs in enumerate(recalls[subjects == subj]):
            seen = set()
            for k, rec in enumerate(trial_recs[:-1]):
                seen.add(rec)
                # Only increment transition counts if the current and next recall are BOTH correct recalls
                if clean_recalls_mask[j][k] and clean_recalls_mask[j][k + 1] and k >= skip_first_n:
                    next_rec = trial_recs[k + 1]
                    pt = np.array([trans for trans in range(1 - rec, listLength + 1 - rec) if rec + trans not in seen], dtype=int)
                    poss[pt + listLength - 1] += 1
                    trans = next_rec - rec
                    # Record the actual transition that was made
                    actual[trans + listLength - 1] += 1

        result[i, :] = [a/p if p!=0 else np.nan for a,p in zip(actual, poss)]

    result[:, listLength - 1] = np.nan

    return result[:, listLength - lag_num - 1:listLength + lag_num]
示例#5
0
文件: or_score.py 项目: pennmem/pybeh
def or_score(recalls=None, subjects=None, listLength=None, rec_mask=None):
    """
    OR_SCORE  Recall probability for either of a pair, conditional on their lag.

    Computes the OR scores between pairs of items as a function of
    lag, irrespective of their serial positions.

    or_scores = or_score(recalls_matrix, subjects,list_length,pres_mask)

    INPUTS:
        recalls_matrix: a matrix whose elements are serial positions of recalled
                        items. The rows of this matrix should represent recalls
                        made by a single subject on a single trial.

        subjects:       a column vector which indexes the rows of recalls_matrix
                        with a subject number (or other identifier). That is,
                        the recall trials of subject S should be located in
                        recalls_matrix(find(subjects==S), :smile:

        list_length:    a scalar indicating the number of serial positions in the
                        presented lists. serial positions are assumed to run
                        from 1:list_length.


        rec_mask:       if given, a logical matrix the same shape as
                        recalls_matrix, true at positions for items to be
                        counted. Note that this mask does NOT need to exclude
                        repeats and intrusions, but it should include repeated
                        items if we're only interested in OR scores of 1p items
                        in mixed lists.

    OUTPUTS:
        or_scores:      a matrix of OR scores, i.e. the probability of
                        recalling one item or the other from a pair. Its
                        columns are indexed by lag and its rows are indexed by
                        subject.

    NOTES:
                        Using the proper pres_mask is CRUCIAL for determining proper or scores.
                        For the typical serial position curve, items from the recency and primacy
                        portions should be excluded, which is typically determined manually.
    """
    if recalls is None:
        raise Exception('You must pass a recall matrix.')
    elif listLength is None:
        raise Exception('You must pass a list length.')
    elif subjects is None:
        raise Exception('You must pass a subject matrix.')
    elif rec_mask is None:
        rec_mask = mask.make_clean_recalls_mask2d(recalls)
        recalls = mask.mask_data(recalls, rec_mask)
    elif len(rec_mask) != len(recalls):
        raise Exception('rec_mask needs to be same shape as recalls.')
    result = []
    subject = np.unique(subjects)
    for subj in subject:
        orscore_subj = []

        for lag in range(1, listLength):

            track = [0] * (listLength - lag)
            n = 0

            for subj_ind, subj_num in enumerate(subjects):
                if subj == subj_num:
                    n += 1
                    for ind in range(len(track)):
                        if ind + 1 in recalls[
                                subj_ind] or ind + 1 + lag in recalls[subj_ind]:
                            track[ind] += 1
            total = 0
            print(track)
            for val in track:
                total += val
            print(total)
            total = total / float(n * (listLength - lag))
            orscore_subj.append(total)
        result.append(orscore_subj)
    return result
示例#6
0
def dist_fact(rec_itemnos=None,
              pres_itemnos=None,
              subjects=None,
              dist_mat=None,
              is_similarity=False,
              skip_first_n=0):
    """
    Returns a clustering factor score for each subject, based on the provided distance metric (Polyn, Norman, & Kahana,
    2009). Can also be used with a similarity matrix (e.g. LSA, word2vec) if is_similarity is set to True.

    :param rec_itemnos: A trials x recalls matrix containing the ID numbers (between 1 and N) of the items recalled on
        each trial. Extra-list intrusions should appear as -1, and the matrix should be padded with zeros if the number
        of recalls differs by trial.
    :param pres_itemnos: A trials x items matrix containing the ID numbers (between 1 and N) of the items presented on
        each trial.
    :param subjects: A list/array containing identifiers (e.g. subject number) indicating which subject completed each
        trial.
    :param dist_mat: An NxN matrix (where N is the number of words in the wordpool) defining either the distance or
        similarity between every pair of words in the wordpool. Whether dist_mat defines distance or similarity can be
        specified with the is_similarity parameter.
    :param is_similarity: If False, dist_mat is assumed to be a distance matrix. If True, dist_mat is instead treated as
        a similarity matrix (i.e. larger values correspond to smaller distances). (DEFAULT = False)
    :param skip_first_n: An integer indicating the number of recall transitions to ignore from the start of each recall
        period, for the purposes of calculating the clustering factor. This can be useful to avoid biasing your results,
        as early transitions often differ from later transition in terms of their clustering. Note that the first n
        recalls will still count as already recalled words for the purposes of determining which transitions are
        possible. (DEFAULT = 0)

    :return: An array containing the clustering factor score for each subject (sorted by alphabetical order).
    """

    if rec_itemnos is None:
        raise Exception('You must pass a recall_itemnos matrix.')
    if pres_itemnos is None:
        raise Exception('You must pass a pres_itemnos matrix.')
    if subjects is None:
        raise Exception('You must pass a subjects vector.')
    if dist_mat is None:
        raise Exception(
            'You must pass either a similarity matrix or a distance matrix.')
    if len(rec_itemnos) != len(subjects) or len(pres_itemnos) != len(subjects):
        raise Exception(
            'The rec_itemnos and pres_itemnos matrices must have the same number of rows as the list of'
            'subjects.')
    if not isinstance(skip_first_n, int) or skip_first_n < 0:
        raise ValueError('skip_first_n must be a nonnegative integer.')

    # Convert inputs to numpy arrays if they are not arrays already
    rec_itemnos = np.array(rec_itemnos)
    pres_itemnos = np.array(pres_itemnos)
    subjects = np.array(subjects)
    dist_mat = np.array(dist_mat)

    # Provide a warning if the user inputs a dist_mat that looks like a similarity matrix (scores on diagonal are
    # large), but has left is_similarity as False
    if (not is_similarity) and np.nanmean(
            np.diagonal(dist_mat)) > np.nanmean(dist_mat):
        warnings.warn(
            'It looks like you might be using a similarity matrix (e.g. LSA, word2vec) instead of a distance'
            ' matrix, but you currently have is_similarity set to False. If you are using a similarity'
            ' matrix, make sure to set is_similarity to True when running dist_fact().'
        )

    # Initialize arrays to store each participant's results
    usub = np.unique(subjects)
    total = np.zeros_like(usub, dtype=float)
    count = np.zeros_like(usub, dtype=float)

    # Identify locations of all correct recalls (not PLI, ELI, or repetition)
    clean_recalls_mask = np.array(
        make_clean_recalls_mask2d(
            make_recalls_matrix(pres_itemnos, rec_itemnos)))

    # Calculate distance factor score for each trial
    for i, trial_data in enumerate(rec_itemnos):
        seen = set()
        # Identify the current subject's index in usub to determine their position in the total and count arrays
        subj_ind = np.where(usub == subjects[i])[0][0]
        # Loop over the recalls on the current trial
        for j, rec in enumerate(trial_data[:-1]):
            seen.add(rec)
            # Only count transition if both the current and next recalls are valid
            if clean_recalls_mask[i, j] and clean_recalls_mask[
                    i, j + 1] and j >= skip_first_n:
                # Identify the distance between the current recall and all valid recalls that could follow it
                possibles = np.array([
                    dist_mat[rec - 1, poss_rec - 1]
                    for poss_rec in pres_itemnos[i] if poss_rec not in seen
                ])
                # Identify the distance between the current recall and the next
                actual = dist_mat[rec - 1, trial_data[j + 1] - 1]
                # Find the proportion of possible transitions that were larger than the actual transition
                ptile_rank = dist_percentile_rank(actual, possibles,
                                                  is_similarity)
                # Add transition to the appropriate participant's score
                if ptile_rank is not None:
                    total[subj_ind] += ptile_rank
                    count[subj_ind] += 1

    # Find temporal factor scores as the participants' average transition scores
    count[count == 0] = np.nan
    final_data = total / count

    return final_data
示例#7
0
def p_stop_perc(recalls=None,
                subject=None,
                time=None,
                record_time=None,
                exit_time_thresh=None,
                rec_mask=None):
    """
    P_STOP_PERC  Probability of stopping recall.

    [p_stops,denoms] = p_stop_op(recalls,time_mat,rec_length,exit_time_thresh,subjects,mask)

    INPUTS:
        recalls:            a matrix whose elements are serial positions of recalled
                            items. The rows of this matrix should represent recalls
                            made by a single subject on a single trial.

        time_mat:           a matrix whose elements are the millisecond times of the
                            recalled items. The rows of this matrix should represent
                            recalls made by a single subject on a single trial.

        rec_length:         a column vector of the length, in milliseconds, of the
                            recall period for each trial.

        exit_time_thresh:   a scalar, in ms, representing time required between
                            the final recall in a trial and the end of the recall
                            period. If the final recall occurred less than
                            exit_time_thresh away from the end of the recall period,
                            the trial will not be used in the analysis (the idea
                            being that perhaps the subject simply ran out of time
                            but was not done recalling). A trial will only be
                            included if the final recall is greater than
                            exit_time_thresh away from the end of the recall period
                            AND the time between the final recall and the end of
                            of the recall period is greater than all of the
                            inter-response times on that trial. NOTE: If you do not
                            want to exclude any trials based on these criteria, do not
                            pass values for time_mat, rec_length, and exit_time_thresh.

        subjects:           a column vector which indexes the rows of recalls
                            with a subject number (or other identifier). That is,
                            the recall trials of subject S should be located in
                            recalls(find(subjects==S)

        mask:               a logical matrix the same shape as recalls. The mask
                            should be true for any item in the condition of interest.
                            If NOT given, a clean recalls mask is used (i.e., only
                            correct recalls will be analyzed.)

    OUTPUTS:
        p_stops:            a column vector of stopping probabilities with rows
                            representing subjects.

        denoms:             a column vector of denominator values that went into the
                            probability calculations.

    Notes about the mask:   You can use the mask input to analyze different types
                            of recalls (e.g., correct recalls, repetitions,
                            intrusions). There are masking functions that will
                            create these.

    To only look at correct responses:  mask = make_clean_recalls_mask2d(recalls)

    To only look at repetitions:        mask = make_mask_only_reps2d(recalls);
    To look at intrusions, you can create a mask using an
    intrusions matrix (which must be the same size as
    recalls, where a positive integer indicates a prior
    list intrusions, and a -1 indicates an extra list
    intrusion)

    To only look at PLIs:
    mask = make_mask_only_pli2d(intrusions);

    To only look at XLIs:
    mask = make_mask_only_xli2d(intrusions);

    You can use the repetition mask, PLI mask, and XLI mask
    to create a mask for all incorrect recalls.

    EXAMPLE:
    [p_stops,denom] = p_stop_perc(recalls,time_mat,rec_length,12000,subjects,mask)
    """
    if recalls is None:
        raise Exception('You must pass a recalls matrix.')
    elif subject is None:
        raise Exception('You must pass a subjects vector.')

    elif len(recalls) != len(subject):
        raise Exception(
            'recalls matrix must have the same number of rows as subjects.')
    if rec_mask == None:
        rec_mask = mask.make_clean_recalls_mask2d(recalls)
    if any([time != None, record_time != None, exit_time_thresh != None
            ]) and not all(
                [time != None, record_time != None, exit_time_thresh != None]):
        raise Exception(
            'You must pass a time_mat, recall_length scalar, and an exit_time_thresh scalar, or all must be empty.'
        )
    elif all([time != None, record_time != None, exit_time_thresh != None]):
        marker = True
        if len(time) != len(recalls):
            raise Exception('time matrix needs to be same shape as recalls')
    else:
        marker = False
    recalls = mask.mask_data(recalls, rec_mask)
    subjects = np.unique(subject)
    result = []
    for subj in subjects:
        stop = [0] * len(recalls[0])
        num = [0] * len(recalls[0])
        for subj_ind, subj_num in enumerate(subject):
            if subj == subj_num:
                if marker == True:
                    if last_nonzero(time[subj_ind]) == None:
                        continue
                    elif record_time - last_nonzero(
                            time[subj_ind]
                    ) > exit_time_thresh and record_time - last_nonzero(
                            time[subj_ind]) > max_irt(time[subj_ind]):
                        for n, rec in enumerate(recalls[subj_ind]):
                            if rec != 0:
                                num[n] += 1
                        for n, rec in enumerate(recalls[subj_ind][::-1]):
                            if rec != 0:
                                stop[len(recalls[0]) - n - 1] += 1
                                break
                    else:
                        continue
                else:
                    for n, rec in enumerate(recalls[subj_ind]):
                        if rec != 0:
                            num[n] += 1
                    for n, rec in enumerate(recalls[subj_ind][::-1]):
                        if rec != 0:
                            stop[len(recalls[0]) - n - 1] += 1
                            break
        total_num = 0
        total_denom = 0
        for item in stop:
            total_num += item
        for item in num:
            total_denom += item
        result.append(total_num / float(total_denom))
    return result
示例#8
0
def temp_fact(recalls=None, subjects=None, listLength=None, skip_first_n=0):
    """
    Returns the lag-based temporal clustering factor for each subject (Polyn, Norman, & Kahana, 2009).

    :param recalls: A trials x recalls matrix containing the serial positions (between 1 and listLength) of words
        recalled on each trial. Intrusions should appear as -1, and the matrix should be padded with zeros if the number
        of recalls differs by trial.
    :param subjects: A list/array containing identifiers (e.g. subject number) indicating which subject completed each
        trial.
    :param listLength: A positive integer indicating the number of items presented on each trial.
    :param skip_first_n: An integer indicating the number of recall transitions to ignore from the start of each recall
        period, for the purposes of calculating the clustering factor. This can be useful to avoid biasing your results,
        as early transitions often differ from later transition in terms of their clustering. Note that the first n
        recalls will still count as already recalled words for the purposes of determining which transitions are
        possible. (DEFAULT=0)

    :return: An array containing the temporal clustering factor score for each subject (sorted by alphabetical order).
    """

    if recalls is None:
        raise Exception('You must pass a recalls matrix.')
    if subjects is None:
        raise Exception('You must pass a subjects vector.')
    if listLength is None:
        raise Exception('You must pass a list length.')
    if len(recalls) != len(subjects):
        raise Exception('The recalls matrix must have the same number of rows as the list of subjects.')
    if not isinstance(skip_first_n, int) or skip_first_n < 0:
        raise ValueError('skip_first_n must be a nonnegative integer.')

    # Convert recalls and subjects to numpy arrays if they are not arrays already
    recalls = np.array(recalls)
    subjects = np.array(subjects)

    # Initialize range for possible next recalls, based on list length
    possibles_range = range(1, listLength + 1)

    # Initialize arrays to store each participant's results
    usub = np.unique(subjects)
    total = np.zeros_like(usub, dtype=float)
    count = np.zeros_like(usub, dtype=float)

    # Identify locations of all correct recalls (not PLI, ELI, or repetition)
    clean_recalls_mask = np.array(make_clean_recalls_mask2d(recalls))

    # Calculate temporal factor score for each trial
    for i, trial_data in enumerate(recalls):
        seen = set()
        # Identify the current subject's index in usub to determine their position in the total and count arrays
        subj_ind = np.where(usub == subjects[i])[0][0]
        # Loop over the recalls on the current trial
        for j, serialpos in enumerate(trial_data[:-1]):
            seen.add(serialpos)
            # Only count transition if both the current and next recalls are valid
            if clean_recalls_mask[i, j] and clean_recalls_mask[i, j+1] and j >= skip_first_n:
                # Identify possible transitions
                possibles = np.array([abs(item - serialpos) for item in possibles_range if item not in seen])
                # Identify actual transition
                next_serialpos = trial_data[j + 1]
                actual = abs(next_serialpos - serialpos)
                # Find the proportion of transition lags that were larger than the actual transition
                ptile_rank = temp_percentile_rank(actual, possibles)
                # Add transition to the appropriate participant's score
                if ptile_rank is not None:
                    total[subj_ind] += ptile_rank
                    count[subj_ind] += 1

    # Find temporal factor scores as the participants' average transition scores
    count[count == 0] = np.nan
    final_data = total / count

    return final_data