Пример #1
0
 def start_actors(self):
     qsize = self.in_queue.qsize()
     printv("Starting actors for {} jobs...".format(qsize))
     self.actors = [
         RayActor.remote(self.data_object, self.in_queue, self.out_queue,
                         self.status_queue) for _ in range(qsize)
     ]
Пример #2
0
 def add_kfold_indices(self, n_folds, clean=True):
     subject_ids = self.data_dict['data'].index
     kfold_indices = get_kfold_indices(subject_ids, n_folds)
     if clean:
         kfold_indices = clean_kfold_indices(kfold_indices, self.behav_data)
     self.data_dict['kfold_indices'] = kfold_indices
     printv("You need to (re-) upload data after this operation.")
Пример #3
0
def get_suprathr_edges_new(df_dict,
                           p_thresh_pos=None,
                           p_thresh_neg=None,
                           r_thresh_pos=None,
                           r_thresh_neg=None,
                           percentile_neg=None,
                           percentile_pos=None,
                           top_n_pos=None,
                           top_n_neg=None):
    folds_list = list(df_dict.keys())
    n_edges = len(df_dict[folds_list[0]])
    masks_dict = {}

    for fold in folds_list:
        pcorr_df = df_dict[fold]
        n_edges = len(df_dict[fold])
        masks_dict[fold] = {}
        suprathr_edges_mask = {}
        if p_thresh_pos and p_thresh_neg:
            suprathr_edges_mask['pos'] = (pcorr_df['r'] > 0) & (
                pcorr_df['p-val'] <= p_thresh_pos)
            suprathr_edges_mask['neg'] = (pcorr_df['r'] < 0) & (
                pcorr_df['p-val'] <= p_thresh_neg)
        elif r_thresh_pos and r_thresh_neg:
            suprathr_edges_mask['pos'] = pcorr_df['r'] > r_thresh_pos
            suprathr_edges_mask['neg'] = pcorr_df['r'] < -abs(
                r_thresh_neg
            )  # r_thresh_neg can be both given as a positive or a negative value
        elif percentile_pos and percentile_neg:
            r_thresh_pos = np.nanpercentile(pcorr_df['r'], percentile_pos)
            r_thresh_neg = np.nanpercentile(pcorr_df['r'][pcorr_df['r'] < 0],
                                            100 - percentile_neg)
            suprathr_edges_mask['pos'] = pcorr_df['r'] > r_thresh_pos
            suprathr_edges_mask['neg'] = pcorr_df['r'] < -abs(r_thresh_neg)
        elif top_n_pos and top_n_neg:
            suprathr_edges_mask['pos'] = np.zeros(pcorr_df.shape[0])
            suprathr_edges_mask['neg'] = np.zeros(pcorr_df.shape[0])
            suprathr_edges_mask['pos'][np.argpartition(
                pcorr_df['r'][pcorr_df['r'].notna()],
                -top_n_pos)[-top_n_pos:]] = 1
            suprathr_edges_mask['neg'][np.argpartition(
                pcorr_df['r'][pcorr_df['r'].notna()],
                top_n_neg)[:top_n_neg]] = 1
        else:
            raise TypeError(
                'Either p_thresh_{neg, pos} or r_thresh_{neg, pos} or percentile_{neg, pos} or top_n_{pos, neg} needs to be defined.'
            )

        printv(
            "Fold {}: Pos/neg suprathreshold edges (max r pos/max r neg): {}/{} ({}/{})"
            .format(fold + 1, suprathr_edges_mask['pos'].sum(),
                    suprathr_edges_mask['neg'].sum(), pcorr_df['r'].max(),
                    pcorr_df['r'].min()))
        for tail in ('pos', 'neg'):
            masks_dict[fold][tail] = np.zeros(n_edges)
            masks_dict[fold][tail][:] = suprathr_edges_mask[tail].astype(bool)

    return masks_dict
Пример #4
0
 def get_fselection_results(self):
     results = self.get_results(self.out_queue)
     n = 1
     N = len(results)
     printv("\n")
     for result in results:
         fold = result[0]
         perm = result[1]
         df = result[2]
         printv("Rearranging result {} of {}".format(n, N), update=True)
         self.fselection_results[perm][fold] = df
         n += 1
Пример #5
0
def perform_cpm(all_fc_data, all_behav_data, behav, k=10, **cpm_kwargs):
    """
  Takes functional connectivity and behaviour dataframes, selects a behaviour
  """
    from hcpsuite import timer
    timer('tic', name='Linear CPM')
    assert all_fc_data.index.equals(
        all_behav_data.index
    ), "Row (subject) indices of FC vcts and behavior don't match!"

    subj_list = all_fc_data.index  # get subj_list from df index

    indices = create_kfold_indices(subj_list, k=k)

    # Initialize df for storing observed and predicted behavior
    col_list = []
    for tail in ["pos", "neg", "glm"]:
        col_list.append(behav + " predicted (" + tail + ")")
    col_list.append(behav + " observed")
    behav_obs_pred = pd.DataFrame(index=subj_list, columns=col_list)

    # Initialize array for storing feature masks
    n_edges = all_fc_data.shape[1]
    all_masks = {}
    all_masks["pos"] = np.zeros((k, n_edges))
    all_masks["neg"] = np.zeros((k, n_edges))

    n_folds_completed = 0
    for fold in range(k):
        printv("Doing fold {} of {} (successful folds: {})...".format(
            fold + 1, k, n_folds_completed))
        train_subs, test_subs = split_train_test(subj_list,
                                                 indices,
                                                 test_fold=fold)
        train_vcts, train_behav, test_vcts = get_train_test_data(
            all_fc_data, train_subs, test_subs, all_behav_data, behav=behav)
        mask_dict = select_features(train_vcts, train_behav, **cpm_kwargs)
        all_masks["pos"][fold, :] = mask_dict["pos"]
        all_masks["neg"][fold, :] = mask_dict["neg"]
        model_dict = build_model(train_vcts, mask_dict, train_behav)
        if not model_dict:  # build_model returns False instead of a dict if an array is not valid
            printv("  - Fold failed -> continuing with next fold...")
            continue  # Skip fold if generated arrays are not valid
        behav_pred = apply_model(test_vcts, mask_dict, model_dict)
        for tail, predictions in behav_pred.items():
            behav_obs_pred.loc[test_subs, behav + " predicted (" + tail +
                               ")"] = predictions
        n_folds_completed += 1

    print("\nCPM completed. Successful folds: {}".format(n_folds_completed))
    behav_obs_pred.loc[subj_list, behav + " observed"] = all_behav_data[behav]
    timer('toc')
    return behav_obs_pred, all_masks
Пример #6
0
def create_fold(fold, subj_list, indices, all_fc_data, all_behav_data, behav):
    printv("Creating fold {}...".format(fold + 1), update=True)
    train_subs, test_subs = split_train_test(subj_list,
                                             indices,
                                             test_fold=fold)
    train_vcts, train_behav, test_vcts = get_train_test_data(all_fc_data,
                                                             train_subs,
                                                             test_subs,
                                                             all_behav_data,
                                                             behav=behav)

    return train_vcts, train_behav, test_vcts, test_subs
Пример #7
0
def do_perm(n, n_perm, train_vcts, train_behav, test_vcts, test_behav,
            **cpm_kwargs):
    global verbose
    printv("Doing permutation {} of {} ({} %)".format(
        n + 1, n_perm, round(((n + 1) / n_perm) * 100, 2)))
    train_behav['obs'] = np.random.permutation(train_behav['obs'])
    train_behav = train_behav['obs']
    v = verbose
    verbose = False
    mask_dict = select_features(train_vcts, train_behav, **cpm_kwargs)
    verbose = v
    model_dict = build_model(train_vcts, mask_dict, train_behav)
    behav_pred = apply_model(test_vcts, mask_dict, model_dict)
    test_behav['glm'] = behav_pred[
        'glm']  # We're only interested in GLM at this point
    r = get_r_value(test_behav, tail='glm')[0]

    return r
Пример #8
0
 def get_results(self, queue, n=100):
     """
   Common get function utilised by get_{prediction,fselection}_results
   Input: queue to get from, max number of items to get at once
   Output: combined results
   """
     N_total = 0
     results = []
     while not queue.empty():
         N = queue.qsize()
         if N_total < N:
             N_total = N
         if N < n:  # To provide some sort of progress display, it makes sense to split
             n = N
         printv("Retrieving results: {} of {}".format(
             len(results) + n, N_total),
                update=True)
         items = queue.get_nowait_batch(n)
         for item in items:
             results.append(item)
     return results
Пример #9
0
    def status(self, verbose=True):
        N = self.status_queue.size()
        status_list_list = self.status_queue.get_nowait_batch(N)
        printv("Retrieving {} items from status queue...".format(N))
        for status_list in status_list_list:
            pid = status_list[0]
            node = status_list[1]
            msg = status_list[2]
            self.status_dict[pid] = {"msg": msg, "node": node}
        n = 1
        for pid, info in self.status_dict.items():
            if (info['msg']):  # Only print alive actors (-> msg != None)
                print("Actor {} [{}@{}]: {}".format(n, pid, info['node'],
                                                    info['msg']))
                n += 1
        print("\n")
        out_size = self.out_queue.qsize()
        in_size = self.in_queue.qsize()
        print("Jobs done: {}".format(out_size))
        print("Jobs remaining in queue: {}".format(in_size))

        return out_size, in_size
Пример #10
0
def convert_matrices_to_dataframe(array, subj_ids):
    """
  Takes a NumPy array (subjects:parcels:parcels) and converts it into a Pandas dataframe fit 
  for downstream CPM analyses
  """
    assert array.shape[0] == len(
        subj_ids
    ), "Number of subject IDs is not equal to number of subjects in neuroimage file"
    fc_data = {}
    n = 0
    for id in subj_ids:
        printv("Flattening matrix of subject {} ({} of {}...)".format(
            id, n + 1, len(subj_ids)),
               update=True)
        tmp = array[n, :, :]  # Get matrix of a single subject
        fc_data[id] = tmp[np.triu_indices_from(
            tmp, k=1)]  # Only use upper triangle of symmetric matrix
        n += 1
    printv("\nCreating DataFrame from matrices...")
    fc_data = pd.DataFrame.from_dict(fc_data, orient='index')

    return fc_data
Пример #11
0
def do_fold(fold, train_vcts, train_behav, test_vcts, test_subs, subj_list,
            all_behav_data, behav, **cpm_kwargs):
    global all_masks
    global behav_obs_pred
    global n_folds_completed
    printv("Doing fold {}...".format(fold + 1))
    mask_dict = select_features(train_vcts, train_behav, **cpm_kwargs)
    if isinstance(mask_dict, dict):
        all_masks["pos"][fold, :] = mask_dict["pos"]
        all_masks["neg"][fold, :] = mask_dict["neg"]
    else:
        all_masks["pos"][fold, :] = np.nan
        all_masks["neg"][fold, :] = np.nan
    model_dict = build_model(train_vcts, mask_dict, train_behav)
    if not model_dict:  # build_model returns False instead of a dict if an array is not valid
        printv("  - Fold failed -> continuing with next fold...")
        return False  # Skip fold if generated arrays are not valid
    behav_pred = apply_model(test_vcts, mask_dict, model_dict)
    for tail, predictions in behav_pred.items():
        behav_obs_pred.loc[test_subs,
                           behav + " predicted (" + tail + ")"] = predictions
    n_folds_completed += 1

    behav_obs_pred.loc[subj_list, behav + " observed"] = all_behav_data[behav]
Пример #12
0
def select_features(train_vcts,
                    train_behav,
                    r_thresh=0.2,
                    corr_type='pearson'):
    """
  Runs the CPM feature selection step: 
  - correlates each edge with behavior, and returns a mask of edges that are correlated above some threshold, one for each tail (positive and negative)
  """
    global verbose
    assert train_vcts.index.equals(
        train_behav.index), "Row indices of FC vcts and behavior don't match!"

    # Correlate all edges with behav vector
    if corr_type == 'pearson':
        cov = np.dot(train_behav.T - train_behav.mean(), train_vcts -
                     train_vcts.mean(axis=0)) / (train_behav.shape[0] - 1)
        corr = cov / np.sqrt(
            np.var(train_behav, ddof=1) * np.var(train_vcts, axis=0, ddof=1))
    elif corr_type == 'spearman':
        corr = []
        for edge in train_vcts.columns:
            r_val = sp.stats.spearmanr(train_vcts.loc[:, edge], train_behav)[0]
            corr.append(r_val)

    # Define positive and negative masks
    mask_dict = {}
    mask_dict["pos"] = corr > r_thresh
    mask_dict["neg"] = corr < -r_thresh

    printv(
        "  - Found ({}/{}) edges positively/negatively correlated (threshold: {}) with behavior in the training set"
        .format(mask_dict["pos"].sum(), mask_dict["neg"].sum(),
                r_thresh))  # for debugging
    printv("  - Max r pos: {}, max r neg: {}".format(corr.max(), corr.min()))

    return mask_dict
Пример #13
0
 def start_workers(self, n_workers):
     printv("Starting {} workers".format(n_workers))
     self.workers = [
         RayWorker.remote(self.data_object, self.in_queue, self.out_queue,
                          self.status_queue) for _ in range(n_workers)
     ]
Пример #14
0
def get_suprathr_edges(df_dict,
                       perm=-1,
                       p_thresh_pos=None,
                       p_thresh_neg=None,
                       r_thresh_pos=None,
                       r_thresh_neg=None,
                       percentile_neg=None,
                       percentile_pos=None,
                       top_n_pos=None,
                       top_n_neg=None):
    folds_list = list(df_dict[perm].keys())
    n_folds = len(folds_list)
    n_edges = len(df_dict[perm][folds_list[0]])
    all_masks = {}
    all_masks['pos'] = np.zeros((n_folds, n_edges))
    all_masks['neg'] = np.zeros((n_folds, n_edges))

    for fold in folds_list:
        pcorr_df = df_dict[perm][fold]
        suprathr_edges_mask = {}
        if p_thresh_pos and p_thresh_neg:
            suprathr_edges_mask['pos'] = (pcorr_df['r'] > 0) & (
                pcorr_df['p-val'] <= p_thresh_pos)
            suprathr_edges_mask['neg'] = (pcorr_df['r'] < 0) & (
                pcorr_df['p-val'] <= p_thresh_neg)
        elif r_thresh_pos and r_thresh_neg:
            suprathr_edges_mask['pos'] = pcorr_df['r'] > r_thresh_pos
            suprathr_edges_mask['neg'] = pcorr_df['r'] < -abs(
                r_thresh_neg
            )  # r_thresh_neg can be both given as a positive or a negative value
        elif percentile_pos and percentile_neg:
            r_thresh_pos = np.nanpercentile(pcorr_df['r'], percentile_pos)
            r_thresh_neg = np.nanpercentile(pcorr_df['r'][pcorr_df['r'] < 0],
                                            100 - percentile_neg)
            suprathr_edges_mask['pos'] = pcorr_df['r'] > r_thresh_pos
            suprathr_edges_mask['neg'] = pcorr_df['r'] < -abs(r_thresh_neg)
        elif top_n_pos and top_n_neg:
            suprathr_edges_mask['pos'] = np.zeros(pcorr_df.shape[0])
            suprathr_edges_mask['neg'] = np.zeros(pcorr_df.shape[0])
            suprathr_edges_mask['pos'][np.argpartition(
                pcorr_df['r'][pcorr_df['r'].notna()],
                -top_n_pos)[-top_n_pos:]] = 1
            suprathr_edges_mask['neg'][np.argpartition(
                pcorr_df['r'][pcorr_df['r'].notna()],
                top_n_neg)[:top_n_neg]] = 1
        else:
            raise TypeError(
                'Either p_thresh_{neg, pos} or r_thresh_{neg, pos} or percentile_{neg, pos} or top_n_{pos, neg} needs to be defined.'
            )

        printv(
            "Fold {}: Pos/neg suprathreshold edges (max r pos/max r neg): {}/{} ({}/{})"
            .format(fold + 1, suprathr_edges_mask['pos'].sum(),
                    suprathr_edges_mask['neg'].sum(), pcorr_df['r'].max(),
                    pcorr_df['r'].min()))
        all_masks['pos'][fold, :] = suprathr_edges_mask['pos'].astype(bool)
        all_masks['neg'][fold, :] = suprathr_edges_mask['neg'].astype(bool)

    return all_masks

    def start_autosave(path, ray_handler, save_size=1000):
        from ray.util.ml_utils.node import force_on_current_node
        AutoSaveActor = force_on_current_node(AutoSaveActor)
        autosave_actor = AutoSaveActor.remote(path, ray_handler, save_size)

        return autosave_actor
Пример #15
0
def plot_consistent_edges_loo(r_mat,
                              thresh=0.13,
                              tail='pos',
                              consistency=0.8,
                              coords=None,
                              save=False,
                              fname='consistent_edges.svg',
                              **plot_connectome_kwargs):
    """Plot edges obtained in a leave-one out CPM above a defined threshold that 
  are selected in at least a defined percentage of subjects."""

    r_mat = np.moveaxis(r_mat, -1, 0)
    r_mat[np.isnan(
        r_mat)] = 0  # We need to zero NaNs otherweise symmetrizing won't work
    if not is_symmetric(r_mat[0]):
        r_mat = symmetrize_matrices(r_mat,
                                    mirror_lower=True)  # Symmetrize matrices
    r_mat_flat = r_mat.reshape(r_mat.shape[0], r_mat.shape[1] *
                               r_mat.shape[2])  # Flatten matrix
    edges_mask = np.zeros(r_mat_flat.shape[1])

    if tail == 'pos':
        edges_count = np.zeros(r_mat_flat.shape[1])
        for i in range(0, r_mat_flat.shape[0]):
            edges_count[
                r_mat_flat[i] >
                thresh] += 1  # Count number of times an edge is suprathreshold
        edges_mask[edges_count > (r_mat.shape[0] * consistency)] = 1
    elif tail == 'neg':
        edges_count = np.zeros(r_mat_flat.shape[1])
        for i in range(0, r_mat_flat.shape[0]):
            edges_count[r_mat_flat[i] < (thresh * -1)] += 1
        edges_mask[edges_count > (r_mat.shape[0] * consistency)] = -1
    elif tail == 'combined':
        edges_count = np.zeros(r_mat_flat.shape[1])
        for i in range(0, r_mat_flat.shape[0]):
            edges_count[abs(r_mat_flat[i]) > thresh] += 1
        edges_mask[edges_count > (r_mat.shape[0] * consistency)] = 1
    else:  # aka tail='both'
        edges_count_pos = np.zeros(r_mat_flat.shape[1])
        edges_count_neg = np.zeros(r_mat_flat.shape[1])
        for i in range(0, r_mat_flat.shape[0]):
            edges_count_pos[r_mat_flat[i] > thresh] += 1
            edges_count_neg[r_mat_flat[i] < (thresh * -1)] += 1
        edges_mask[edges_count_pos > (r_mat.shape[0] * consistency)] = 1
        edges_mask[edges_count_neg > (r_mat.shape[0] * consistency)] = -1

    nodes_mask = edges_mask.reshape((r_mat.shape[1], r_mat.shape[2]))
    printv("There are {} suprathreshold (> {}) edges in {} % of the subjects".
           format(
               len(edges_mask[edges_mask != 0]) / 2, thresh,
               consistency * 100))

    degrees = []
    for node in range(513):
        degree = np.sum(abs(nodes_mask[
            node, :]))  # Determine degree of each node and add it to list
        degrees.append(degree)

    plotting.plot_connectome(nodes_mask,
                             node_coords=coords,
                             display_mode='lzry',
                             node_size=[degree * 20 for degree in degrees],
                             edge_kwargs={"linewidth": 2},
                             **plot_connectome_kwargs)
    if save:
        plt.savefig(fname)