def temporal_pca(train_data, test_data, train_half=1, test_half=2, stories=None, subjects=None, hemisphere=None, save_prefix=None, **kwargs): # Transpose time-series training data train_data_t = {} # By default grab all stories stories = check_keys(train_data, keys=stories) for story in stories: train_data_t[story] = {} # By default just grab all subjects subject_list = check_keys(train_data[story], keys=subjects, subkey=story) for subject in subject_list: train_data_t[story][subject] = {} # For simplicity we just assume same hemis across stories/subjects hemis = check_keys(train_data[story][subject], keys=hemisphere) for hemi in hemis: train_data_t[story][subject][hemi] = train_data[story][ subject][hemi].T # Fit SRM on connectivities and get transformation matrices transforms = pca_fit(train_data_t, stories=stories, hemisphere=hemisphere, **kwargs) # Apply transformations to training data train_transformed = pca_transform(train_data, transforms, half=train_half, stories=stories, subjects=subjects, hemisphere=hemisphere, save_prefix=save_prefix + '-train') print("Finished applying tPCA transformations to training data") # Apply transformations to test data test_transformed = pca_transform(test_data, transforms, half=test_half, stories=stories, subjects=subjects, hemisphere=hemisphere, save_prefix=save_prefix + '-test') print("Finished applying tPCA transformations to test data") return train_transformed, test_transformed
def target_wsfc(data, targets, stories=None, subjects=None, hemisphere=None, zscore_fcs=True): # By default grab all stories stories = check_keys(data, keys=stories) target_isfcs = {} for story in stories: target_isfcs[story] = {} # By default just grab all subjects subject_list = check_keys(data[story], keys=subjects, subkey=story) for subject in subject_list: target_isfcs[story][subject] = {} # Stack targets in third dimension target_stack = np.dstack( ([targets[story][subject] for subject in subject_list])) # By default grab both hemispheres hemis = check_keys(data[story][subject_list[0]], keys=hemisphere) # Get for specified hemisphere(s) for hemi in hemis: # Grab ROI data and targets data_stack = np.dstack( ([data[story][subject][hemi] for subject in subject_list])) # Compute WSFCs between ROI and targets wsfcs = [] for s in np.arange(data_stack.shape[2]): d = data_stack[..., s].shape[1] wsfc = np.corrcoef(data_stack[..., s].T, target_stack[..., s].T)[:d, d:] wsfcs.append(np.expand_dims(wsfc, 0)) wsfcs = np.vstack(wsfcs) # Optionally z-score across targets if zscore_fcs: wsfcs = zscore(np.nan_to_num(wsfcs), axis=2) for s, subject in enumerate(subject_list): target_isfcs[story][subject][hemi] = wsfcs[s] print(f"Finished computing target WSFCs for story '{story}'") return target_wsfcs
def srm_project(train_data, test_data, targets, srms, target_fc=target_isfc, train_half=1, test_half=2, stories=None, subjects=None, hemisphere=None, zscore_projected=True, save_prefix=None): # Compute FCs for projecting via connectivity target_fcs = target_fc(train_data, targets, stories=stories, subjects=subjects, hemisphere=hemisphere) # By default grab all stories stories = check_keys(test_data, keys=stories) data_projected = {} for story in stories: data_projected[story] = {} # By default just grab all subjects subject_list = check_keys(test_data[story], keys=subjects, subkey=story) for subject in subject_list: data_projected[story][subject] = {} hemis = check_keys(test_data[story][subject], keys=hemisphere) for hemi in hemis: # Find new projection into shared space based on FCs projection = srms[hemi].transform_subject( target_fcs[story][subject][hemi]) # Project left-out test data into shared space projected = test_data[story][subject][hemi].dot(projection) # Optionally z-score projected output data if zscore_projected: projected = zscore(projected, axis=0) data_projected[story][subject][hemi] = projected if save_prefix: save_fn = (f'data/{subject}_task-{story}_' f'half-{test_half}_{save_prefix}-test_{hemi}.npy') np.save(save_fn, projected) return data_projected
def target_isfc(data, targets, stories=None, subjects=None, hemisphere=None, zscore_fcs=True): # By default grab all stories stories = check_keys(data, keys=stories) target_isfcs = {} for story in stories: target_isfcs[story] = {} # By default just grab all subjects subject_list = check_keys(data[story], keys=subjects, subkey=story) for subject in subject_list: target_isfcs[story][subject] = {} # Stack targets in third dimension target_stack = np.dstack( ([targets[story][subject] for subject in subject_list])) # By default grab both hemispheres hemis = check_keys(data[story][subject_list[0]], keys=hemisphere) # Get for specified hemisphere(s) for hemi in hemis: # Grab ROI data and targets data_stack = np.dstack( ([data[story][subject][hemi] for subject in subject_list])) # Compute ISFCs between ROI and targets isfcs = isfc(data_stack, targets=target_stack) # Optionally z-score across targets if zscore_fcs: isfcs = zscore(np.nan_to_num(isfcs), axis=2) for s, subject in enumerate(subject_list): target_isfcs[story][subject][hemi] = isfcs[s] print(f"Finished computing target ISFCs for story '{story}'") return target_isfcs
def parcel_means(data, atlas, parcel_labels=None, stories=None, subjects=None): # By default grab all stories stories = check_keys(data, keys=stories) parcels = {} for story in stories: parcels[story] = {} # By default just grab all subjects subject_list = check_keys(data[story], keys=subjects, subkey=story) # Stack left and right hemispheres for each subject for subject in subject_list: # Loop through both hemispheres hemi_stack = [] for hemi in ['lh', 'rh']: # Grab mean parcel time series for this hemisphere parcel_tss = [] for parcel_label in parcel_labels[hemi]: # Get mean for this parcel parcel_ts = np.mean(data[story][subject][hemi] [:, atlas[hemi] == parcel_label], axis=1) # Expand dimension for easier stacking parcel_tss.append(np.expand_dims(parcel_ts, 1)) # Stack parcel means parcel_tss = np.hstack(parcel_tss) hemi_stack.append(parcel_tss) # Stack hemispheres hemi_stack = np.hstack(hemi_stack) assert hemi_stack.shape[1] == (len(parcel_labels['lh']) + len(parcel_labels['rh'])) parcels[story][subject] = hemi_stack print(f"Finished computing parcel means for '{story}'") return parcels
def pca_transform(data, transforms, half=1, stories=None, subjects=None, hemisphere=None, zscore_transformed=True, save_prefix=None): # By default grab all stories stories = check_keys(data, keys=stories) data_transformed = {} for story in stories: data_transformed[story] = {} # By default just grab all subjects subject_list = check_keys(data[story], keys=subjects, subkey=story) for subject in subject_list: data_transformed[story][subject] = {} hemis = check_keys(data[story][subject], keys=hemisphere) for hemi in hemis: transformed = transforms[hemi].transform( data[story][subject][hemi]) # Optionally z-score transformed output data if zscore_transformed: transformed = zscore(transformed, axis=0) data_transformed[story][subject][hemi] = transformed if save_prefix: save_fn = (f'data/{subject}_task-{story}_' f'half-{half}_{save_prefix}_{hemi}.npy') np.save(save_fn, transformed) return data_transformed
def stack_subjects(data, subjects=None, hemisphere='lh'): # By default just grab all subjects subject_list = check_keys(data, keys=subjects) subject_stack = np.dstack( [data[subject][hemisphere] for subject in subject_list]) assert subject_stack.shape[2] == len(subject_list) return subject_stack
def parcel_srm(data, atlas, k=3, parcel_labels=None, stories=None, subjects=None): # By default grab all stories stories = check_keys(data, keys=stories) # Firsts compute mean time-series for all target parcels targets = parcel_means(data, atlas, parcel_labels=parcel_labels, stories=stories, subjects=subjects) # Compute ISFCs with targets for all vertices target_fcs = target_isfc(data, targets, stories=stories, subjects=subjects) parcels = {} for story in stories: parcels[story] = {} # By default just grab all subjects subject_list = check_keys(data[story], keys=subjects, subkey=story) # Loop through both hemispheres hemi_stack = [] for hemi in ['lh', 'rh']: # Loop through parcels parcel_tss = [] for parcel_label in parcel_labels[hemi]: # Resort parcel FCs into list of subject parcels fc_stack = [] ts_stack = [] for subject in subject_list: # Grab the connectivities for this parcel parcel_fcs = target_fcs[story][subject][hemi][ atlas[hemi] == parcel_label, :] fc_stack.append(parcel_fcs) ts_stack.append(data[story][subject][hemi] [:, atlas[hemi] == parcel_label]) # Set up fresh SRM srm = SRM(features=k) # Train SRM on parcel connectivities srm.fit(np.nan_to_num(fc_stack)) # Apply transformations to time series transformed_stack = [ ts.dot(w) for ts, w in zip(ts_stack, srm.w_) ] transformed_stack = np.dstack(transformed_stack) parcel_tss.append(transformed_stack) print(f"Finished SRM for {hemi} parcel " f"{parcel_label} in '{story}'") # Stack parcel means parcel_tss = np.hstack(parcel_tss) hemi_stack.append(parcel_tss) # Stack hemispheres hemi_stack = np.hstack(hemi_stack) assert hemi_stack.shape[1] == (len(parcel_labels['lh']) + len(parcel_labels['rh'])) * k assert hemi_stack.shape[2] == len(subject_list) # Unstack subjects hemi_stack = np.dsplit(hemi_stack, hemi_stack.shape[2]) for subject, ts in zip(subject_list, hemi_stack): parcels[story][subject] = np.squeeze(ts) print(f"Finished applying cSRM to parcels for '{story}'") return parcels
def vertex_isc(data, threshold=.2, stories=None, subjects=None, half=1, save_iscs=False): # By default grab all stories stories = check_keys(data, keys=stories) vertex_iscs = {} for story in stories: # By default just grab all subjects subject_list = check_keys(data[story], keys=subjects, subkey=story) # Get for specified hemisphere(s) hemi_stack = [] for hemi in ['lh', 'rh']: # Grab ROI data and targets data_stack = np.dstack( ([data[story][subject][hemi] for subject in subject_list])) # Compute mean ISCs for this story and hemisphere iscs = isc(data_stack, summary_statistic='mean') # Optionally save ISCs if save_iscs: save_fn = (f'data/{story}_half-{half}_vertex-iscs_' f'thresh-{threshold}_{hemi}.npy') np.save(save_fn, iscs) hemi_stack.append(iscs) # Stack left and right hemispheres vertex_iscs[story] = np.hstack(hemi_stack) print(f"Finished computing vertex-wise ISCs for '{story}'") # Find the average ISCs across all stories (with Fisher Z) mean_iscs = np.tanh( np.mean([np.arctanh(vertex_iscs[story]) for story in stories], axis=0)) # Optionally save ISCs if save_iscs: save_fn = (f'data/mean_half-{half}_vertex-iscs_' f'thresh-{threshold}_{hemi}.npy') np.save(save_fn, iscs) # Get vertices with mean ISC exceeding threshold isc_mask = mean_iscs >= threshold n_mask = np.sum(isc_mask) mask_lh = isc_mask[:len(isc_mask) // 2] mask_rh = isc_mask[len(isc_mask) // 2:] # Grab vertex time-series in ISC mask masked_data = {} for story in stories: masked_data[story] = {} for subject in subject_list: # Mask and recombine hemispheres masked = np.hstack((data[story][subject]['lh'][:, mask_lh], data[story][subject]['rh'][:, mask_rh])) assert masked.shape[1] == n_mask masked_data[story][subject] = masked print("Finished computing ISC-based targets " f"({n_mask} vertices at threshold r = {threshold})") return masked_data
def pca_fit(target_fcs, stories=None, subjects=None, hemisphere=None, k=360, n_iter=10, half=1, save_prefix=None): # By default grab all stories stories = check_keys(target_fcs, keys=stories) # Recompile FCs accounting for repeat subjects subject_fcs = {} for story in stories: # By default just grab all subjects subject_list = check_keys(target_fcs[story], keys=subjects, subkey=story) for subject in subject_list: # For simplicity we just assume same hemis across stories/subjects hemis = check_keys(target_fcs[story][subject], keys=hemisphere) for hemi in hemis: # If subject is not already there, make new dict for them if subject not in subject_fcs: subject_fcs[subject] = {} # If hemispheres aren't in there, add them if hemi not in subject_fcs[subject]: subject_fcs[subject][hemi] = [] # Finally, make list of connectivity matrices per subject subject_fcs[subject][hemi].append( target_fcs[story][subject][hemi]) # Stack FCs in connectivity space (for all subjects across stories!) all_subjects = list(subject_fcs.keys()) for subject in all_subjects: for hemi in hemis: # If more than one connectivity per subject, take average if len(subject_fcs[subject][hemi]) > 1: subject_fcs[subject][hemi] = np.mean( subject_fcs[subject][hemi], axis=0) else: subject_fcs[subject][hemi] = subject_fcs[subject][hemi][0] # Convert FCs to list for PCA (grab the shared space too) transforms = {} for hemi in hemis: # Declare PCA for this hemi pca = PCA(n_components=k) subject_stack = [] for subject in all_subjects: subject_stack.append(subject_fcs[subject][hemi].T) subject_stack = np.vstack(subject_stack) # Fit PCA start = time() pca.fit(subject_stack) print(f"Finished fitting PCA after {time() - start:.1f} seconds") transforms[hemi] = pca if save_prefix: np.save(f'data/half-{half}_{save_prefix}_pca.npy', transforms) return transforms
def srm_fit(target_fcs, stories=None, subjects=None, hemisphere=None, k=360, n_iter=10, half=1, save_prefix=None): # By default grab all stories stories = check_keys(target_fcs, keys=stories) # Recompile FCs accounting for repeat subjects subject_fcs = {} for story in stories: # By default just grab all subjects subject_list = check_keys(target_fcs[story], keys=subjects, subkey=story) for subject in subject_list: # For simplicity we just assume same hemis across stories/subjects hemis = check_keys(target_fcs[story][subject], keys=hemisphere) for hemi in hemis: # If subject is not already there, make new dict for them if subject not in subject_fcs: subject_fcs[subject] = {} # If hemispheres aren't in there, add them if hemi not in subject_fcs[subject]: subject_fcs[subject][hemi] = [] # Finally, make list of connectivity matrices per subject subject_fcs[subject][hemi].append( target_fcs[story][subject][hemi]) # Stack FCs in connectivity space (for all subjects across stories!) all_subjects = list(subject_fcs.keys()) for subject in all_subjects: for hemi in hemis: # If more than one connectivity per subject, take average if len(subject_fcs[subject][hemi]) > 1: subject_fcs[subject][hemi] = np.mean(subject_fcs[subject][hemi], axis=0) else: subject_fcs[subject][hemi] = subject_fcs[subject][hemi][0] # Convert FCs to list for SRM (grab the shared space too) transforms, shared_space, srms = {}, {}, {} for hemi in hemis: # Declare SRM for this hemi srm = SRM(n_iter=n_iter, features=k) subject_ids, subject_stack = [], [] for subject in all_subjects: subject_ids.append(subject) subject_stack.append(subject_fcs[subject][hemi]) if subject not in transforms: transforms[subject] = {} # Train SRM and apply start = time() srm.fit(subject_stack) print(f"Finished fitting SRM after {time() - start:.1f} seconds") for subject_id, transform in zip(subject_ids, srm.w_): transforms[subject_id][hemi] = transform shared_space[hemi] = srm.s_ srms[hemi] = srm if save_prefix: np.save(f'data/half-{half}_{save_prefix}_w.npy', transforms) np.save(f'data/half-{half}_{save_prefix}_s.npy', shared_space) return transforms, srms
def srm_fit(target_fcs, stories=None, subjects=None, hemisphere=None, k=360, n_iter=10): # By default grab all stories stories = check_keys(target_fcs, keys=stories) # Recompile FCs accounting for repeat subjects subject_fcs = {} for story in stories: # By default just grab all subjects subject_list = check_keys(target_fcs[story], keys=subjects, subkey=story) for subject in subject_list: # For simplicity we just assume same hemis across stories/subjects hemis = check_keys(target_fcs[story][subject], keys=hemisphere) for hemi in hemis: if subject not in subject_fcs: subject_fcs[subject] = {} if hemi not in subject_fcs[subject]: subject_fcs[subject][hemi] = [] subject_fcs[subject][hemi].append( target_fcs[story][subject][hemi]) # Stack FCs across stories in connectivity space for subject in subject_list: for hemi in hemis: if len(subject_fcs[subject][hemi]) > 1: subject_fcs[subject][hemi] = np.mean(subject_fcs[subject][hemi], axis=0) else: subject_fcs[subject][hemi] = subject_fcs[subject][hemi][0] # Convert FCs to list for SRM transforms = {} for hemi in hemis: # Declare SRM for this hemi srm = SRM(n_iter=n_iter, features=k) subject_ids, subject_stack = [], [] for subject in subject_list: subject_ids.append(subject) subject_stack.append(subject_fcs[subject][hemi]) if subject not in transforms: transforms[subject] = {} # Train SRM and apply start = time() srm.fit(subject_stack) print(f"Finished fitting SRM after {time() - start:.1f} seconds") for subject_id, transform in zip(subject_ids, srm.w_): transforms[subject_id][hemi] = transform return transforms
# Loop through keys without replacing existing ones for story in stories: if story not in results: results[story] = {} if story_train == 'within': train_stories, test_stories = story, story elif story_train == 'across': test_stories = story train_stories = [st for st in stories if st is not test_story] elif story_train == 'all': test_stories = story train_stories = stories # By default just grab all subjects subject_list = check_keys(metadata[story]['data']) # Split models and load in data splits train_model_dict = split_models(metadata, stories=stories, subjects=None, half=1, delays=delays) test_model_dict = split_models(metadata, stories=stories, subjects=None, half=2, delays=delays) for roi in rois: if roi not in results[story]: