def plot_component_comparisons(images, labels, score_mat, sign_mat, force=False, out_dir=None): """ Uses the score_mat to match up two images. If force, one-to-one matching is forced. Sign_mat is used to flip signs when comparing two images. """ # Be careful assert len(images) == 2 assert len(labels) == 2 assert images[0].shape == images[1].shape n_components = images[0].shape[3] # values @ 0 and 1 are the same assert score_mat.shape == sign_mat.shape assert len(score_mat[0]) == n_components # Get indices for matching components match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=force) idx_pair = match["idx"] sign_pair = match["sign"] if not force and unmatch["idx"] is not None: idx_pair = np.hstack((idx_pair, unmatch["idx"])) sign_pair = np.hstack((sign_pair, unmatch["sign"])) n_comp = len(idx_pair[0]) # number of comparisons # Calculate a vmax optimal across all the plots # get nonzero part of the image for proper thresholding of # r- or l- only component nonzero_imgs = [img.get_data()[np.nonzero(img.get_data())] for img in images] dat = np.append(nonzero_imgs[0], nonzero_imgs[1]) vmax = stats.scoreatpercentile(np.abs(dat), 99.99) print("Plotting results.") for i in range(n_comp): c1i, c2i = idx_pair[0][i], idx_pair[1][i] cis = [c1i, c2i] prefix = "unmatched-" if i >= n_components else "" num = i-n_components if i >= n_components else i png_name = '%s%s_%s_%s.png' % (prefix, labels[0], labels[1], num) print "plotting %s" % png_name comp_imgs = [index_img(img, ci) for img, ci in zip(images, cis)] # flip the sign if sign_mat for the corresponding comparison is -1 signs = [sign_pair[0][i], sign_pair[1][i]] comp_imgs = [math_img("%d*img" % (sign), img=img) for sign, img in zip(signs, comp_imgs)] if ('R' in labels and 'L' in labels): # Combine left and right image, show just one. # terms are not combined here comp = math_img("img1+img2", img1=comp_imgs[0], img2=comp_imgs[1]) titles = [_title_from_terms( terms=comp_imgs[labels.index(hemi)].terms, ic_idx=cis[labels.index(hemi)], label=hemi, sign=signs[labels.index(hemi)]) for hemi in labels] fh = plt.figure(figsize=(14, 8)) plot_stat_map( comp, axes=fh.gca(), title="\n".join(titles), black_bg=True, symmetric_cbar=True, vmax=vmax) else: # Show two images, one above the other. fh = plt.figure(figsize=(14, 12)) for ii in [0, 1]: # Subplot per image ax = fh.add_subplot(2, 1, ii + 1) comp = comp_imgs[ii] title = _title_from_terms( terms=images[ii].terms, ic_idx=cis[ii], label=labels[ii], sign=signs[ii]) if ii == 0: display = plot_stat_map(comp, axes=ax, title=title, # noqa black_bg=True, symmetric_cbar=True, vmax=vmax) else: # use same cut coords cut_coords = display.cut_coords # noqa display = plot_stat_map(comp, axes=ax, title=title, black_bg=True, symmetric_cbar=True, vmax=vmax, display_mode='ortho', cut_coords=cut_coords) # Save images instead of displaying if out_dir is not None: save_and_close(out_path=op.join(out_dir, png_name), fh=fh)
def load_or_generate_summary(images, term_scores, n_components, scoring, dataset, force=False, sparsityThreshold=0.000005, memory=Memory(cachedir='nilearn_cache')): """ For a given n_components, load summary csvs if they already exist, or run main.py to get and save necessary summary data required for plotting. Returns (wb_summary, R_sparsity, L_sparsity), each of which are DataFrame. """ # Directory to find or save the summary csvs out_dir = op.join('ica_imgs', dataset, 'analyses', str(n_components)) summary_csvs = ["wb_summary.csv", "R_sparsity.csv", "L_sparsity.csv"] # If summary data are already saved as csv files, simply load them if not force and all([op.exists(op.join(out_dir, csv)) for csv in summary_csvs]): print("Loading summary data from %s" % out_dir) (wb_summary, R_sparsity, L_sparsity) = (pd.read_csv(op.join(out_dir, csv)) for csv in summary_csvs) # Otherwise run main.py and save them as csv files else: # Initialize summary DFs (wb_summary, R_sparsity, L_sparsity) = (pd.DataFrame( {"n_comp": [n_components] * n_components}) for i in range(3)) if not op.exists(out_dir): os.makedirs(out_dir) # Use wb matching in main analysis to get component images and # matching scores match_method = 'wb' img_d, score_mats_d, sign_mats_d = do_main_analysis( dataset=dataset, images=images, term_scores=term_scores, key=match_method, force=force, plot=False, n_components=n_components, scoring=scoring) # 1) Get sparsity for each hemisphere for "wb", "R" and "L" imgs hemis = ("R", "L") sparsitySigns = ("pos", "neg", "abs") # Dict of DF and labels used to get and store Sparsity results label_dict = {"wb": (wb_summary, hemis), "R": (R_sparsity, ["R"]), "L": (L_sparsity, ["L"])} for key in label_dict: (df, labels) = label_dict[key] sparsityResults = {label: getHemiSparsity(img_d[key], label, threshold=sparsityThreshold, memory=memory) for label in labels} # {label: (pos_arr, neg_arr, abs_arr)} for i, sign in enumerate(sparsitySigns): for label in labels: df["%s_%s" % (sign, label)] = sparsityResults[label][i] # For wb only, also compute Total sparsity and HPI if key == "wb": df["%sTotal" % sign] = df["%s_R" % sign] + df["%s_L" % sign] df["%sHPI" % sign] = ((df["%s_R" % sign] - df["%s_L" % sign]) / df["%sTotal" % sign].astype(float)) # Save R/L_sparsity DFs R_sparsity.to_csv(op.join(out_dir, "R_sparsity.csv")) L_sparsity.to_csv(op.join(out_dir, "L_sparsity.csv")) # 2) Get SAS of wb component images as well as matched RL images by passing # 2 x wb or RL images and hemi labels to the compare_components (make sure # not to flip when comparing R and L) name_img_pairs = [("wb_SAS", img_d["wb"]), ("matchedRL_SAS", img_d["RL"])] for (name, img) in name_img_pairs: sas_imgs = [img] * 2 score_mat, sign_mat = compare_components(sas_imgs, hemis, scoring, flip=False) # we only care about the diagonal in score_mat wb_summary[name] = score_mat.diagonal() # 3) Finally store indices of matched R, L, and RL components, and the # respective match scores against wb comparisons = [('wb', 'R'), ('wb', 'L'), ('wb', 'RL')] for comparison in comparisons: score_mat, sign_mat = score_mats_d[comparison], sign_mats_d[comparison] matched, unmatched = get_match_idx_pair(score_mat, sign_mat) # Component indices for matched R, L , RL are in matched[1]. # Multiply it by matched[2], which stores sign flipping info. matched_indices = matched[1] * matched[2] wb_summary["matched%s" % comparison[1]] = matched_indices matched_scores = score_mat[matched[0], matched[1]] wb_summary["match%s_score" % comparison[1]] = matched_scores # Save wb_summary wb_summary.to_csv(op.join(out_dir, "wb_summary.csv")) return (wb_summary, R_sparsity, L_sparsity)
def do_main_analysis(dataset, images, term_scores, key="wb", n_components=20, plot=True, max_images=np.inf, scoring='l1norm', query_server=True, force=False, nii_dir=None, plot_dir=None, random_state=42, hemis=('wb', 'R', 'L')): # Output directories nii_dir = nii_dir or op.join('ica_nii', dataset, str(n_components)) plot_dir = plot_dir or op.join('ica_imgs', dataset, '%s-%dics' % (scoring, n_components), '%s-matching' % key) # 1) Components are generated for R-, L-only, and whole brain images. imgs = {} # Load or generate components kwargs = dict(images=[im['absolute_path'] for im in images], n_components=n_components, term_scores=term_scores, out_dir=nii_dir, plot_dir=plot_dir) for hemi in hemis: print("Running analyses on %s" % hemi) imgs[hemi] = (load_or_generate_components(hemi=hemi, force=force, random_state=random_state, **kwargs)) # 2) Compare components in order to get concatenated RL image # "wb": R- and L- is compared to wb-components, then matched # "rl": direct R- and L- comparison, using R as a ref # "lr": direct R- and L- comparison, using L as a ref if key == "wb": comparisons = [('wb', 'R'), ('wb', 'L')] elif key == "rl": comparisons = [('R', 'L')] elif key == "lr": comparisons = [('L', 'R')] score_mats, sign_mats = {}, {} RL_arr = {} for comp in comparisons: img_pair = [imgs[comp[0]], imgs[comp[1]]] # Compare components and plot similarity matrix # The sign_mat contains signs that gave the best score for the comparison score_mat, sign_mat = compare_components_and_plot(images=img_pair, labels=comp, scoring=scoring, out_dir=plot_dir) # Store score_mat and sign_mat score_mats[comp] = score_mat sign_mats[comp] = sign_mat # Get indices for matching up components for both forced and unforced one-to-one matching for force_match in [True, False]: force_status = 'forced' if force_match else 'unforced' plot_sub_dir = op.join(plot_dir, '%s-match' % force_status) match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=force_match) # Store R and L indices/signs to match up R and L for i, hem in enumerate(comp): if hem in ['R', 'L']: RL_arr[(force_status, hem, "idx")] = match["idx"][i] RL_arr[(force_status, hem, "sign")] = match["sign"][i] # If plot=True, plot matched (and unmatched, if unforced matching) components if plot: plot_component_comparisons(images=img_pair, labels=comp, score_mat=score_mat, sign_mat=sign_mat, force=force_match, out_dir=plot_sub_dir) # 3) Now match up R and L (forced vs unforced match) for force_match in [True, False]: force_status = 'forced' if force_match else 'unforced' plot_sub_dir = op.join(plot_dir, '%s-match' % force_status) rl_idx_pair = (RL_arr[(force_status, "R", "idx")], RL_arr[(force_status, "L", "idx")]) rl_sign_pair = (RL_arr[(force_status, "R", "sign")], RL_arr[(force_status, "L", "sign")]) imgs['RL-%s' % force_status] = concat_RL(R_img=imgs['R'], L_img=imgs['L'], rl_idx_pair=rl_idx_pair, rl_sign_pair=rl_sign_pair) # 4) Compare the concatenated image to bilateral components (ie wb vs RL) # Note that for wb-matching, diagnal components will be matched by definition comp = ('wb', 'RL-%s' % force_status) img_pair = [imgs[comp[0]], imgs[comp[1]]] score_mat, sign_mat = compare_components_and_plot(images=img_pair, labels=comp, scoring=scoring, out_dir=plot_sub_dir) # Store score_mat and sign_mat score_mats[comp] = score_mat sign_mats[comp] = sign_mat # If plot=True, plot matched (and unmatched, if unforced matching) components if plot: plot_component_comparisons(images=img_pair, labels=comp, score_mat=score_mat, sign_mat=sign_mat, force=force_match, out_dir=plot_sub_dir) # Show term comparisons between the matched wb, R and L components match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=force_match) terms = [imgs[hemi].terms for hemi in hemis] # component index list for wb, R and L wb_idx_arr = match["idx"][0] r_idx_arr, l_idx_arr = [arr[match["idx"][1]] for arr in rl_idx_pair] ic_idx_list = [wb_idx_arr, r_idx_arr, l_idx_arr] # sign flipping list for wb, R and L wb_sign_arr = match["sign"][0] r_sign_arr, l_sign_arr = [match["sign"][1] * arr[match["idx"][1]] for arr in rl_sign_pair] sign_list = [wb_sign_arr, r_sign_arr, l_sign_arr] plot_term_comparisons(terms, labels=hemis, ic_idx_list=ic_idx_list, sign_list=sign_list, color_list=['g', 'r', 'b'], top_n=5, bottom_n=5, standardize=True, out_dir=plot_sub_dir) return imgs, score_mats, sign_mats
def main_ic_loop(components, scoring, dataset, query_server=True, force=False, memory=Memory(cachedir='nilearn_cache'), **kwargs): # $FIX Test with just 'wb' and 'rl' matching until 'lr' matching is fixed # match_methods = ['wb', 'rl', 'lr'] match_methods = ['wb', 'rl'] out_dir = op.join('ica_imgs', dataset) mean_scores, unmatched = [], [] # Get the data once. images, term_scores = get_dataset( dataset, query_server=query_server) for match_method in match_methods: print("Plotting results for %s matching method" % match_method) mean_score_d, num_unmatched_d = {}, {} for c in components: print("Running analysis with %d components" % c) # main analysis is run for each component and match method: # plotting for component comparisons are done only if force=True img_d, score_mats_d, sign_mats_d = do_main_analysis( dataset=dataset, images=images, term_scores=term_scores, key=match_method, force=force, plot=force, n_components=c, scoring=scoring, **kwargs) # Get mean dissimilarity scores and number of unmatched for each comparisons # in score_mats_d for comp in score_mats_d: score_mat, sign_mat = score_mats_d[comp], sign_mats_d[comp] # For ("wb", "RL-forced") and ("wb", "RL-unforced") if "forced" in comp[1]: if "-forced" in comp[1]: match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=True) elif "-unforced" in comp[1]: match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=False) n_unmatched = unmatch["idx"].shape[1] if unmatch["idx"] is not None else 0 um_label = "unmatched RL" mean_score = score_mat[[match["idx"][0], match["idx"][1]]].mean() score_label = "%s" % (" vs ".join(comp)) # Store values in respective dict if c == components[0]: mean_score_d[score_label] = [mean_score] if "-unforced" in comp[1]: num_unmatched_d[um_label] = [n_unmatched] else: mean_score_d[score_label].append(mean_score) if "-unforced" in comp[1]: num_unmatched_d[um_label].append(n_unmatched) # For ("wb", "R"), ("wb", "L") --wb matching or ("R", "L") --rl matching else: for force_match in [True, False]: match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=force_match) mean_score = score_mat[[match["idx"][0], match["idx"][1]]].mean() if force_match: score_label = "%s%s" % (" vs ".join(comp), "-forced") n_unmatched = None else: score_label = "%s%s" % (" vs ".join(comp), "-unforced") n_unmatched = unmatch["idx"].shape[1] if unmatch["idx"] is not None else 0 um_label = "unmatched %s" % comp[1] # Store values in respective dict if c == components[0]: mean_score_d[score_label] = [mean_score] if not force_match: num_unmatched_d[um_label] = [n_unmatched] else: mean_score_d[score_label].append(mean_score) if not force_match: num_unmatched_d[um_label].append(n_unmatched) # Store vals as df ms_df = pd.DataFrame(mean_score_d, index=components) um_df = pd.DataFrame(num_unmatched_d, index=components) mean_scores.append(ms_df) unmatched.append(um_df) # Save combined df combined = pd.concat([ms_df, um_df], axis=1) out = op.join(out_dir, '%s-matching_simscores.csv' % match_method) combined.to_csv(out) # We have all the scores for the matching method; now plot. fh, axes = plt.subplots(1, len(match_methods), sharex=True, sharey=True, figsize=(18, 6)) fh.suptitle("Average dissimilarity scores for the best-match pairs", fontsize=16) labels = ["wb vs R-unforced", "wb vs L-unforced", "R vs L-unforced", "wb vs RL-unforced", "wb vs R-forced", "wb vs L-forced", "R vs L-forced", "wb vs RL-forced", "unmatched R", "unmatched L", "unmatched RL"] styles = ["r-", "b-", "m-", "g-", "r:", "b:", "m:", "g:", "r--", "b--", "m--"] for i, ax in enumerate(axes): ax2 = ax.twinx() ms_df, um_df = mean_scores[i], unmatched[i] for label, style in zip(labels, styles): if label in ms_df.columns: ms_df[label].plot(ax=ax, style=style) elif label in um_df.columns: um_df[label].plot(ax=ax2, style=style) ax.set_title("%s-matching" % (match_methods[i])) # Shrink current axis by 30% box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) ax2.set_position([box.x0, box.y0, box.width * 0.75, box.height]) # Put the legends to the right of the current axis ax.legend(loc='lower left', bbox_to_anchor=(1.3, 0.5)) ax2.legend(loc='upper left', bbox_to_anchor=(1.3, 0.5)) fh.text(0.5, 0.04, "# of components", ha="center") fh.text(0.05, 0.5, "mean %s scores" % scoring, va='center', rotation='vertical') fh.text(0.95, 0.5, "# of unmatched R- or L- components", va='center', rotation=-90) out_path = op.join(out_dir, '%s_simscores.png' % scoring) save_and_close(out_path, fh=fh)