def p_reject(rejects_matrix = None, subject = None, rec_mask = None, recalls = None): """ P_REJECT Computes probability of rejecting recalled items. p_rejects = p_reject(reject_matrix, rec_mask) INPUTS: rejects_matrix: a matrix whose elements indicates whether recalled items were rejected or accepted as correct items in externalized free recall (EFR). The rows of this matrix should represent recalls made by a single subject on a single trial. An element of the rejected matrix should be equal to 1 if and only if that item was rejected. subjects: a column vector which indexes the rows of recalls_matrix with a subject number (or other identifier). That is, the recall trials of subject S should be located in recalls_matrix(find(subjects==S), :smile: rec_mask: if given, a logical matrix of the same shape as recalls_matrix, which is false at positions (i, j) where the value at recalls_matrix(i, j) should be excluded from the calculation of the probability of recall. If NOT given, a standard clean recalls mask is used, which excludes repeats, intrusions and empty cells OUTPUTS: p_reject: a vector of probablities. Rows are indexed by subject. """ if rejects_matrix is None: raise Exception('You must pass a rejects matrix.') elif subject is None: raise Exception('You must pass a subject.') elif rec_mask is None: rec_mask = mask.make_clean_recalls_mask2d(recalls) elif len(rejects_matrix) != len(subject): raise Exception('rejects matrix needs to be same length as subjects.') subjects = np.unique(subject) result = [] for subj in subjects: for subj_ind, subj_num in enumerate(subject): if subj == subj_num: denom = 0 num = 0 for subj_ind, subj_num in enumerate(subject): if subj == subj_num: for index, item in enumerate(rec_mask[subj_ind]): if item == 1: denom += 1 if rejects_matrix[subj_ind][index] == 1: num += 1 print(subj, denom, num) if denom != 0: result.append(num / float(denom)) else: result.append(0) return result
def sem_crp(recalls=None, recalls_itemnos=None, pres_itemnos=None, subjects=None, sem_sims=None, n_bins=10, listLength=None): """sanity check""" if recalls_itemnos is None: raise Exception('You must pass a recalls-by-item-numbers matrix.') elif pres_itemnos is None: raise Exception( 'You must pass a presentations-by-item-numbers matrix.') elif sem_sims is None: raise Exception('You must pass a semantic similarity matrix.') elif subjects is None: raise Exception('You must pass a subjects vector.') elif listLength is None: raise Exception('You must pass a listLength') elif len(recalls_itemnos) != len(subjects): raise Exception( 'recalls matrix must have the same number of rows as subjects.') # Make sure that all input arrays and matrices are numpy arrays recalls = np.array(recalls, dtype=int) recalls_itemnos = np.array(recalls_itemnos, dtype=int) pres_itemnos = np.array(pres_itemnos, dtype=int) subjects = np.array(subjects) sem_sims = np.array(sem_sims) # Set diagonal of the similarity matrix to nan np.fill_diagonal(sem_sims, np.nan) # Sort and split all similarities into equally sized bins all_sim = sem_sims.flatten() all_sim = np.sort(all_sim[~np.isnan(all_sim)]) bins = np.array_split(all_sim, n_bins) bins = [b[0] for b in bins] # Convert the similarity matrix to bin numbers for easy bin lookup later bin_sims = np.digitize(sem_sims, bins) - 1 # Convert recalled item numbers to the corresponding indices of the similarity matrix by subtracting 1 recalls_itemnos -= 1 pres_itemnos -= 1 usub = np.unique(subjects) bin_means = np.zeros((len(usub), n_bins)) crp = np.zeros((len(usub), n_bins)) # For each subject for i, subj in enumerate(usub): # Create a filter to select only the current subject's data subj_mask = subjects == subj subj_recalls = recalls[subj_mask] subj_rec_itemnos = recalls_itemnos[subj_mask] subj_pres_itemnos = pres_itemnos[subj_mask] # Create trials x items matrix where item j, k indicates whether the kth recall on trial j was a correct recall clean_recalls_mask = np.array(make_clean_recalls_mask2d(subj_recalls)) # Setup counts for number of possible and actual transitions, as well as the sim value of actual transitions actual = np.zeros(n_bins) poss = np.zeros(n_bins) val = np.zeros(n_bins) # For each of the current subject's trials for j, trial_recs in enumerate(subj_recalls): seen = set() # For each recall on the current trial for k, rec in enumerate(trial_recs[:-1]): seen.add(rec) # Only increment transition counts if the current and next recall are BOTH correct recalls if clean_recalls_mask[j, k] and clean_recalls_mask[j, k + 1]: this_recno = subj_rec_itemnos[j, k] next_recno = subj_rec_itemnos[j, k + 1] # Lookup semantic similarity and its bin between current recall and next recall sim = sem_sims[this_recno, next_recno] b = bin_sims[this_recno, next_recno] actual[b] += 1 val[b] += sim # Get a list of not-yet-recalled word numbers poss_rec = [ subj_pres_itemnos[j][x] for x in range(listLength) if x + 1 not in seen ] # Lookup the similarity bins between the current recall and all possible correct recalls poss_trans = np.unique( [bin_sims[this_recno, itemno] for itemno in poss_rec]) for b in poss_trans: poss[b] += 1 crp[i, :] = actual / poss # CRP is calculated as number of actual transitions / number of possible ones bin_means[ i, :] = val / actual # Bin means are defined as the average similarity of actual transitions per bin return bin_means, crp
def crl(recalls=None, times=None, subjects=None, listLength=None, lag_num=None, skip_first_n=0): """ CRL Inter-response time as a function of lag. Calculates the mean time it takes to move from one word position to another as a funciton of lag. Returns lag-conditional response latency times. Please note: if i and j are consecutive words only real transitions are counted meaning 1) neither i nor j are intrusions ( not == -1) 2) words that have been recalled cannot be transitioned to nor cannot be transitioned from FUNCTION: crl = crl(recalls, times, subjects, listLength, lag_num); INPUT ARGS: recalls - recall positions times - time associated with each recall subjects - subject number associated with each trial listLength - number of words in the list lag_num - lag number to output skip_first_n - an integer indicating the number of recall transitions to to ignore from the start of the recall period, for the purposes of calculating the CRL. this can be useful to avoid biasing your results, as the first 2-3 transitions are almost always temporally clustered with short IRTs. (DEFAULT=0) OUTPUT ARGS: crl - a matrix of average crl times by lag position for each subject """ if recalls is None: raise Exception('You must pass a recalls matrix.') if times is None: raise Exception('You must pass a times vector.') if subjects is None: raise Exception('You must pass a subject vector.') if listLength is None: raise Exception('You must pass a list length.') if lag_num is None: lag_num = listLength - 1 elif lag_num < 1 or lag_num >= listLength or not isinstance(lag_num, int): raise ValueError( 'Lag number needs to be a positive integer that is less than the list length.' ) if not isinstance(skip_first_n, int): raise ValueError('skip_first_n must be an integer.') # Convert inputs to numpy arrays recalls = np.array(recalls) times = np.array(times) subjects = np.array(subjects) # Get a list of unique subjects -- we will calculate a CRP for each usub = np.unique(subjects) # Number of possible lags = (listLength - 1) * 2 + 1; e.g. a length-24 list can have lags -23 through +23 num_lags = 2 * listLength - 1 # Initialize array to store the CRP for each subject (or other unique identifier) result = np.zeros((usub.size, num_lags)) # Initialize arrays to store transition counts trans_count = np.empty(num_lags) time_count = np.empty(num_lags) # For each subject/unique identifier for i, subj in enumerate(usub): # Reset counts for each participant trans_count.fill(0) time_count.fill(0) cur_recs = recalls[subjects == subj] cur_times = times[subjects == subj] # Create trials x items matrix where item j, k indicates whether the kth recall on trial j was a correct recall clean_recalls_mask = np.array(make_clean_recalls_mask2d(cur_recs)) # For each trial that matches that identifier for j, trial_recs in enumerate(cur_recs): for k, rec in enumerate(trial_recs[:-1]): # Only increment transition and timing counts if the current and next recall are BOTH correct recalls if clean_recalls_mask[j, k] and clean_recalls_mask[ j, k + 1] and k >= skip_first_n: next_rec = trial_recs[k + 1] trans = next_rec - rec trans_time = cur_times[j, k + 1] - cur_times[j, k] # Record the transition that was made and its IRT trans_count[trans + listLength - 1] += 1 time_count[trans + listLength - 1] += trans_time result[i, :] = time_count / trans_count result[i, trans_count == 0] = np.nan return result[:, listLength - lag_num - 1:listLength + lag_num]
def crp(recalls=None, subjects=None, listLength=None, lag_num=None, skip_first_n=0): ''' CRP Conditional response probability as a function of lag (lag-CRP). lag_crps = crp(recalls_matrix, subjects, list_length, lag_num) INPUTS: recalls: A 2D iterable whose elements are serial positions of recalled items. The rows of this array should represent recalls made by a single subject on a single trial. subjects: A column vector which indexes the rows of "recalls" with a subject number (or other identifier). The subject identifiers should be repeated for each row of "recalls" originating from the same subject. list_length: A scalar indicating the number of serial positions in the presented lists. Serial positions are assumed to run from 1:list_length. lag_num: A scalar indicating the max number of lags to track. skip_first_n: An integer indicating the number of recall transitions to ignore from the start of the recall period, for the purposes of calculating the CRP. This can be useful to avoid biasing your results, as the first 2-3 transitions are almost always temporally clustered. Note that the first n recalls will still count as already recalled words for the purposes of determining which transitions are possible. (DEFAULT=0) OUTPUTS: lag_crps: A matrix of lag-CRP values. Each row contains the values for one subject. It has as many columns as there are possible transitions (i.e., the length of (-list_length + 1) : (list_length - 1) ). The center column, corresponding to the "transition of length 0," is guaranteed to be filled with NaNs. Any lag_crps element which had no possible transitions for the input data for that subject will also have a value of NaN. For example, if list_length == 4, a row in lag_crps has 7 columns, corresponding to the transitions from -3 to +3: lag-CRPs: [ 0.1 0.2 0.3 NaN 0.3 0.1 0.0 ] transitions: -3 -2 -1 0 +1 +2 +3 ''' if recalls is None: raise Exception('You must pass a recalls matrix.') elif subjects is None: raise Exception('You must pass a subjects vector.') elif listLength is None: raise Exception('You must pass a list length.') elif len(recalls) != len(subjects): raise Exception('recalls matrix must have the same number of rows as subjects.') if lag_num is None: lag_num = listLength - 1 elif lag_num < 1 or lag_num >= listLength or not isinstance(lag_num, int): raise ValueError('Lag number needs to be a positive integer that is less than the list length.') if not isinstance(skip_first_n, int): raise ValueError('skip_first_n must be an integer.') # Convert recalls and subjects to numpy arrays recalls = np.array(recalls) subjects = np.array(subjects) # Get a list of unique subjects -- we will calculate a CRP for each usub = np.unique(subjects) # Number of possible lags = (listLength - 1) * 2 + 1; e.g. a length-24 list can have lags -23 through +23 num_lags = 2 * listLength - 1 # Initialize array to store the CRP for each subject (or other unique identifier) result = np.zeros((usub.size, num_lags)) # Initialize arrays to store transition counts actual = np.empty(num_lags) poss = np.empty(num_lags) # For each subject/unique identifier for i, subj in enumerate(usub): # Reset counts for each participant actual.fill(0) poss.fill(0) # Create trials x items matrix where item j, k indicates whether the kth recall on trial j was a correct recall clean_recalls_mask = np.array(make_clean_recalls_mask2d(recalls[subjects == subj])) # For each trial that matches that identifier for j, trial_recs in enumerate(recalls[subjects == subj]): seen = set() for k, rec in enumerate(trial_recs[:-1]): seen.add(rec) # Only increment transition counts if the current and next recall are BOTH correct recalls if clean_recalls_mask[j][k] and clean_recalls_mask[j][k + 1] and k >= skip_first_n: next_rec = trial_recs[k + 1] pt = np.array([trans for trans in range(1 - rec, listLength + 1 - rec) if rec + trans not in seen], dtype=int) poss[pt + listLength - 1] += 1 trans = next_rec - rec # Record the actual transition that was made actual[trans + listLength - 1] += 1 result[i, :] = [a/p if p!=0 else np.nan for a,p in zip(actual, poss)] result[:, listLength - 1] = np.nan return result[:, listLength - lag_num - 1:listLength + lag_num]
def or_score(recalls=None, subjects=None, listLength=None, rec_mask=None): """ OR_SCORE Recall probability for either of a pair, conditional on their lag. Computes the OR scores between pairs of items as a function of lag, irrespective of their serial positions. or_scores = or_score(recalls_matrix, subjects,list_length,pres_mask) INPUTS: recalls_matrix: a matrix whose elements are serial positions of recalled items. The rows of this matrix should represent recalls made by a single subject on a single trial. subjects: a column vector which indexes the rows of recalls_matrix with a subject number (or other identifier). That is, the recall trials of subject S should be located in recalls_matrix(find(subjects==S), :smile: list_length: a scalar indicating the number of serial positions in the presented lists. serial positions are assumed to run from 1:list_length. rec_mask: if given, a logical matrix the same shape as recalls_matrix, true at positions for items to be counted. Note that this mask does NOT need to exclude repeats and intrusions, but it should include repeated items if we're only interested in OR scores of 1p items in mixed lists. OUTPUTS: or_scores: a matrix of OR scores, i.e. the probability of recalling one item or the other from a pair. Its columns are indexed by lag and its rows are indexed by subject. NOTES: Using the proper pres_mask is CRUCIAL for determining proper or scores. For the typical serial position curve, items from the recency and primacy portions should be excluded, which is typically determined manually. """ if recalls is None: raise Exception('You must pass a recall matrix.') elif listLength is None: raise Exception('You must pass a list length.') elif subjects is None: raise Exception('You must pass a subject matrix.') elif rec_mask is None: rec_mask = mask.make_clean_recalls_mask2d(recalls) recalls = mask.mask_data(recalls, rec_mask) elif len(rec_mask) != len(recalls): raise Exception('rec_mask needs to be same shape as recalls.') result = [] subject = np.unique(subjects) for subj in subject: orscore_subj = [] for lag in range(1, listLength): track = [0] * (listLength - lag) n = 0 for subj_ind, subj_num in enumerate(subjects): if subj == subj_num: n += 1 for ind in range(len(track)): if ind + 1 in recalls[ subj_ind] or ind + 1 + lag in recalls[subj_ind]: track[ind] += 1 total = 0 print(track) for val in track: total += val print(total) total = total / float(n * (listLength - lag)) orscore_subj.append(total) result.append(orscore_subj) return result
def dist_fact(rec_itemnos=None, pres_itemnos=None, subjects=None, dist_mat=None, is_similarity=False, skip_first_n=0): """ Returns a clustering factor score for each subject, based on the provided distance metric (Polyn, Norman, & Kahana, 2009). Can also be used with a similarity matrix (e.g. LSA, word2vec) if is_similarity is set to True. :param rec_itemnos: A trials x recalls matrix containing the ID numbers (between 1 and N) of the items recalled on each trial. Extra-list intrusions should appear as -1, and the matrix should be padded with zeros if the number of recalls differs by trial. :param pres_itemnos: A trials x items matrix containing the ID numbers (between 1 and N) of the items presented on each trial. :param subjects: A list/array containing identifiers (e.g. subject number) indicating which subject completed each trial. :param dist_mat: An NxN matrix (where N is the number of words in the wordpool) defining either the distance or similarity between every pair of words in the wordpool. Whether dist_mat defines distance or similarity can be specified with the is_similarity parameter. :param is_similarity: If False, dist_mat is assumed to be a distance matrix. If True, dist_mat is instead treated as a similarity matrix (i.e. larger values correspond to smaller distances). (DEFAULT = False) :param skip_first_n: An integer indicating the number of recall transitions to ignore from the start of each recall period, for the purposes of calculating the clustering factor. This can be useful to avoid biasing your results, as early transitions often differ from later transition in terms of their clustering. Note that the first n recalls will still count as already recalled words for the purposes of determining which transitions are possible. (DEFAULT = 0) :return: An array containing the clustering factor score for each subject (sorted by alphabetical order). """ if rec_itemnos is None: raise Exception('You must pass a recall_itemnos matrix.') if pres_itemnos is None: raise Exception('You must pass a pres_itemnos matrix.') if subjects is None: raise Exception('You must pass a subjects vector.') if dist_mat is None: raise Exception( 'You must pass either a similarity matrix or a distance matrix.') if len(rec_itemnos) != len(subjects) or len(pres_itemnos) != len(subjects): raise Exception( 'The rec_itemnos and pres_itemnos matrices must have the same number of rows as the list of' 'subjects.') if not isinstance(skip_first_n, int) or skip_first_n < 0: raise ValueError('skip_first_n must be a nonnegative integer.') # Convert inputs to numpy arrays if they are not arrays already rec_itemnos = np.array(rec_itemnos) pres_itemnos = np.array(pres_itemnos) subjects = np.array(subjects) dist_mat = np.array(dist_mat) # Provide a warning if the user inputs a dist_mat that looks like a similarity matrix (scores on diagonal are # large), but has left is_similarity as False if (not is_similarity) and np.nanmean( np.diagonal(dist_mat)) > np.nanmean(dist_mat): warnings.warn( 'It looks like you might be using a similarity matrix (e.g. LSA, word2vec) instead of a distance' ' matrix, but you currently have is_similarity set to False. If you are using a similarity' ' matrix, make sure to set is_similarity to True when running dist_fact().' ) # Initialize arrays to store each participant's results usub = np.unique(subjects) total = np.zeros_like(usub, dtype=float) count = np.zeros_like(usub, dtype=float) # Identify locations of all correct recalls (not PLI, ELI, or repetition) clean_recalls_mask = np.array( make_clean_recalls_mask2d( make_recalls_matrix(pres_itemnos, rec_itemnos))) # Calculate distance factor score for each trial for i, trial_data in enumerate(rec_itemnos): seen = set() # Identify the current subject's index in usub to determine their position in the total and count arrays subj_ind = np.where(usub == subjects[i])[0][0] # Loop over the recalls on the current trial for j, rec in enumerate(trial_data[:-1]): seen.add(rec) # Only count transition if both the current and next recalls are valid if clean_recalls_mask[i, j] and clean_recalls_mask[ i, j + 1] and j >= skip_first_n: # Identify the distance between the current recall and all valid recalls that could follow it possibles = np.array([ dist_mat[rec - 1, poss_rec - 1] for poss_rec in pres_itemnos[i] if poss_rec not in seen ]) # Identify the distance between the current recall and the next actual = dist_mat[rec - 1, trial_data[j + 1] - 1] # Find the proportion of possible transitions that were larger than the actual transition ptile_rank = dist_percentile_rank(actual, possibles, is_similarity) # Add transition to the appropriate participant's score if ptile_rank is not None: total[subj_ind] += ptile_rank count[subj_ind] += 1 # Find temporal factor scores as the participants' average transition scores count[count == 0] = np.nan final_data = total / count return final_data
def p_stop_perc(recalls=None, subject=None, time=None, record_time=None, exit_time_thresh=None, rec_mask=None): """ P_STOP_PERC Probability of stopping recall. [p_stops,denoms] = p_stop_op(recalls,time_mat,rec_length,exit_time_thresh,subjects,mask) INPUTS: recalls: a matrix whose elements are serial positions of recalled items. The rows of this matrix should represent recalls made by a single subject on a single trial. time_mat: a matrix whose elements are the millisecond times of the recalled items. The rows of this matrix should represent recalls made by a single subject on a single trial. rec_length: a column vector of the length, in milliseconds, of the recall period for each trial. exit_time_thresh: a scalar, in ms, representing time required between the final recall in a trial and the end of the recall period. If the final recall occurred less than exit_time_thresh away from the end of the recall period, the trial will not be used in the analysis (the idea being that perhaps the subject simply ran out of time but was not done recalling). A trial will only be included if the final recall is greater than exit_time_thresh away from the end of the recall period AND the time between the final recall and the end of of the recall period is greater than all of the inter-response times on that trial. NOTE: If you do not want to exclude any trials based on these criteria, do not pass values for time_mat, rec_length, and exit_time_thresh. subjects: a column vector which indexes the rows of recalls with a subject number (or other identifier). That is, the recall trials of subject S should be located in recalls(find(subjects==S) mask: a logical matrix the same shape as recalls. The mask should be true for any item in the condition of interest. If NOT given, a clean recalls mask is used (i.e., only correct recalls will be analyzed.) OUTPUTS: p_stops: a column vector of stopping probabilities with rows representing subjects. denoms: a column vector of denominator values that went into the probability calculations. Notes about the mask: You can use the mask input to analyze different types of recalls (e.g., correct recalls, repetitions, intrusions). There are masking functions that will create these. To only look at correct responses: mask = make_clean_recalls_mask2d(recalls) To only look at repetitions: mask = make_mask_only_reps2d(recalls); To look at intrusions, you can create a mask using an intrusions matrix (which must be the same size as recalls, where a positive integer indicates a prior list intrusions, and a -1 indicates an extra list intrusion) To only look at PLIs: mask = make_mask_only_pli2d(intrusions); To only look at XLIs: mask = make_mask_only_xli2d(intrusions); You can use the repetition mask, PLI mask, and XLI mask to create a mask for all incorrect recalls. EXAMPLE: [p_stops,denom] = p_stop_perc(recalls,time_mat,rec_length,12000,subjects,mask) """ if recalls is None: raise Exception('You must pass a recalls matrix.') elif subject is None: raise Exception('You must pass a subjects vector.') elif len(recalls) != len(subject): raise Exception( 'recalls matrix must have the same number of rows as subjects.') if rec_mask == None: rec_mask = mask.make_clean_recalls_mask2d(recalls) if any([time != None, record_time != None, exit_time_thresh != None ]) and not all( [time != None, record_time != None, exit_time_thresh != None]): raise Exception( 'You must pass a time_mat, recall_length scalar, and an exit_time_thresh scalar, or all must be empty.' ) elif all([time != None, record_time != None, exit_time_thresh != None]): marker = True if len(time) != len(recalls): raise Exception('time matrix needs to be same shape as recalls') else: marker = False recalls = mask.mask_data(recalls, rec_mask) subjects = np.unique(subject) result = [] for subj in subjects: stop = [0] * len(recalls[0]) num = [0] * len(recalls[0]) for subj_ind, subj_num in enumerate(subject): if subj == subj_num: if marker == True: if last_nonzero(time[subj_ind]) == None: continue elif record_time - last_nonzero( time[subj_ind] ) > exit_time_thresh and record_time - last_nonzero( time[subj_ind]) > max_irt(time[subj_ind]): for n, rec in enumerate(recalls[subj_ind]): if rec != 0: num[n] += 1 for n, rec in enumerate(recalls[subj_ind][::-1]): if rec != 0: stop[len(recalls[0]) - n - 1] += 1 break else: continue else: for n, rec in enumerate(recalls[subj_ind]): if rec != 0: num[n] += 1 for n, rec in enumerate(recalls[subj_ind][::-1]): if rec != 0: stop[len(recalls[0]) - n - 1] += 1 break total_num = 0 total_denom = 0 for item in stop: total_num += item for item in num: total_denom += item result.append(total_num / float(total_denom)) return result
def temp_fact(recalls=None, subjects=None, listLength=None, skip_first_n=0): """ Returns the lag-based temporal clustering factor for each subject (Polyn, Norman, & Kahana, 2009). :param recalls: A trials x recalls matrix containing the serial positions (between 1 and listLength) of words recalled on each trial. Intrusions should appear as -1, and the matrix should be padded with zeros if the number of recalls differs by trial. :param subjects: A list/array containing identifiers (e.g. subject number) indicating which subject completed each trial. :param listLength: A positive integer indicating the number of items presented on each trial. :param skip_first_n: An integer indicating the number of recall transitions to ignore from the start of each recall period, for the purposes of calculating the clustering factor. This can be useful to avoid biasing your results, as early transitions often differ from later transition in terms of their clustering. Note that the first n recalls will still count as already recalled words for the purposes of determining which transitions are possible. (DEFAULT=0) :return: An array containing the temporal clustering factor score for each subject (sorted by alphabetical order). """ if recalls is None: raise Exception('You must pass a recalls matrix.') if subjects is None: raise Exception('You must pass a subjects vector.') if listLength is None: raise Exception('You must pass a list length.') if len(recalls) != len(subjects): raise Exception('The recalls matrix must have the same number of rows as the list of subjects.') if not isinstance(skip_first_n, int) or skip_first_n < 0: raise ValueError('skip_first_n must be a nonnegative integer.') # Convert recalls and subjects to numpy arrays if they are not arrays already recalls = np.array(recalls) subjects = np.array(subjects) # Initialize range for possible next recalls, based on list length possibles_range = range(1, listLength + 1) # Initialize arrays to store each participant's results usub = np.unique(subjects) total = np.zeros_like(usub, dtype=float) count = np.zeros_like(usub, dtype=float) # Identify locations of all correct recalls (not PLI, ELI, or repetition) clean_recalls_mask = np.array(make_clean_recalls_mask2d(recalls)) # Calculate temporal factor score for each trial for i, trial_data in enumerate(recalls): seen = set() # Identify the current subject's index in usub to determine their position in the total and count arrays subj_ind = np.where(usub == subjects[i])[0][0] # Loop over the recalls on the current trial for j, serialpos in enumerate(trial_data[:-1]): seen.add(serialpos) # Only count transition if both the current and next recalls are valid if clean_recalls_mask[i, j] and clean_recalls_mask[i, j+1] and j >= skip_first_n: # Identify possible transitions possibles = np.array([abs(item - serialpos) for item in possibles_range if item not in seen]) # Identify actual transition next_serialpos = trial_data[j + 1] actual = abs(next_serialpos - serialpos) # Find the proportion of transition lags that were larger than the actual transition ptile_rank = temp_percentile_rank(actual, possibles) # Add transition to the appropriate participant's score if ptile_rank is not None: total[subj_ind] += ptile_rank count[subj_ind] += 1 # Find temporal factor scores as the participants' average transition scores count[count == 0] = np.nan final_data = total / count return final_data