示例#1
0
def load_or_generate_summary(images, term_scores, n_components, scoring, dataset,
                             force=False, sparsityThreshold=0.000005,
                             memory=Memory(cachedir='nilearn_cache')):
    """
    For a given n_components, load summary csvs if they already exist, or
    run main.py to get and save necessary summary data required for plotting.

    Returns (wb_summary, R_sparsity, L_sparsity), each of which are DataFrame.
    """
    # Directory to find or save the summary csvs
    out_dir = op.join('ica_imgs', dataset, 'analyses', str(n_components))
    summary_csvs = ["wb_summary.csv", "R_sparsity.csv", "L_sparsity.csv"]

    # If summary data are already saved as csv files, simply load them
    if not force and all([op.exists(op.join(out_dir, csv)) for csv in summary_csvs]):
        print("Loading summary data from %s" % out_dir)
        (wb_summary, R_sparsity, L_sparsity) = (pd.read_csv(op.join(out_dir, csv))
                                                for csv in summary_csvs)

    # Otherwise run main.py and save them as csv files
    else:
        # Initialize summary DFs
        (wb_summary, R_sparsity, L_sparsity) = (pd.DataFrame(
            {"n_comp": [n_components] * n_components}) for i in range(3))
        if not op.exists(out_dir):
            os.makedirs(out_dir)

        # Use wb matching in main analysis to get component images and
        # matching scores
        match_method = 'wb'
        img_d, score_mats_d, sign_mats_d = do_main_analysis(
            dataset=dataset, images=images, term_scores=term_scores,
            key=match_method, force=force, plot=False,
            n_components=n_components, scoring=scoring)

        # 1) Get sparsity for each hemisphere for "wb", "R" and "L" imgs
        hemis = ("R", "L")
        sparsitySigns = ("pos", "neg", "abs")
        # Dict of DF and labels used to get and store Sparsity results
        label_dict = {"wb": (wb_summary, hemis),
                      "R": (R_sparsity, ["R"]),
                      "L": (L_sparsity, ["L"])}
        for key in label_dict:
            (df, labels) = label_dict[key]
            sparsityResults = {label: getHemiSparsity(img_d[key], label,
                               threshold=sparsityThreshold, memory=memory)
                               for label in labels}  # {label: (pos_arr, neg_arr, abs_arr)}

            for i, sign in enumerate(sparsitySigns):
                for label in labels:
                    df["%s_%s" % (sign, label)] = sparsityResults[label][i]
                # For wb only, also compute Total sparsity and HPI
                if key == "wb":
                    df["%sTotal" % sign] = df["%s_R" % sign] + df["%s_L" % sign]
                    df["%sHPI" % sign] = ((df["%s_R" % sign] - df["%s_L" % sign]) /
                                          df["%sTotal" % sign].astype(float))

        # Save R/L_sparsity DFs
        R_sparsity.to_csv(op.join(out_dir, "R_sparsity.csv"))
        L_sparsity.to_csv(op.join(out_dir, "L_sparsity.csv"))

        # 2) Get SAS of wb component images as well as matched RL images by passing
        # 2 x wb or RL images and hemi labels to the compare_components (make sure
        # not to flip when comparing R and L)
        name_img_pairs = [("wb_SAS", img_d["wb"]),
                          ("matchedRL_SAS", img_d["RL"])]
        for (name, img) in name_img_pairs:
            sas_imgs = [img] * 2
            score_mat, sign_mat = compare_components(sas_imgs, hemis, scoring,
                                                     flip=False)
            # we only care about the diagonal in score_mat
            wb_summary[name] = score_mat.diagonal()

        # 3) Finally store indices of matched R, L, and RL components, and the
        # respective match scores against wb
        comparisons = [('wb', 'R'), ('wb', 'L'), ('wb', 'RL')]
        for comparison in comparisons:
            score_mat, sign_mat = score_mats_d[comparison], sign_mats_d[comparison]
            matched, unmatched = get_match_idx_pair(score_mat, sign_mat)
            # Component indices for matched R, L , RL are in matched[1].
            # Multiply it by matched[2], which stores sign flipping info.
            matched_indices = matched[1] * matched[2]
            wb_summary["matched%s" % comparison[1]] = matched_indices

            matched_scores = score_mat[matched[0], matched[1]]
            wb_summary["match%s_score" % comparison[1]] = matched_scores

            # Save wb_summary
            wb_summary.to_csv(op.join(out_dir, "wb_summary.csv"))

    return (wb_summary, R_sparsity, L_sparsity)
示例#2
0
def main_ic_loop(components, scoring,
                 dataset, query_server=True, force=False,
                 memory=Memory(cachedir='nilearn_cache'), **kwargs):
    # $FIX Test with just 'wb' and 'rl' matching until 'lr' matching is fixed
    # match_methods = ['wb', 'rl', 'lr']
    match_methods = ['wb', 'rl']
    out_dir = op.join('ica_imgs', dataset)
    mean_scores, unmatched = [], []

    # Get the data once.
    images, term_scores = get_dataset(
        dataset, query_server=query_server)

    for match_method in match_methods:
        print("Plotting results for %s matching method" % match_method)
        mean_score_d, num_unmatched_d = {}, {}
        for c in components:
            print("Running analysis with %d components" % c)
            # main analysis is run for each component and match method:
            # plotting for component comparisons are done only if force=True
            img_d, score_mats_d, sign_mats_d = do_main_analysis(
                    dataset=dataset, images=images, term_scores=term_scores,
                    key=match_method, force=force, plot=force,
                    n_components=c, scoring=scoring, **kwargs)

            # Get mean dissimilarity scores and number of unmatched for each comparisons
            # in score_mats_d
            for comp in score_mats_d:
                score_mat, sign_mat = score_mats_d[comp], sign_mats_d[comp]
                # For ("wb", "RL-forced") and ("wb", "RL-unforced")
                if "forced" in comp[1]:
                    if "-forced" in comp[1]:
                        match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=True)
                    elif "-unforced" in comp[1]:
                        match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=False)
                        n_unmatched = unmatch["idx"].shape[1] if unmatch["idx"] is not None else 0
                        um_label = "unmatched RL"
                    mean_score = score_mat[[match["idx"][0], match["idx"][1]]].mean()
                    score_label = "%s" % (" vs ".join(comp))
                    # Store values in respective dict
                    if c == components[0]:
                        mean_score_d[score_label] = [mean_score]
                        if "-unforced" in comp[1]:
                            num_unmatched_d[um_label] = [n_unmatched]
                    else:
                        mean_score_d[score_label].append(mean_score)
                        if "-unforced" in comp[1]:
                            num_unmatched_d[um_label].append(n_unmatched)

                # For ("wb", "R"), ("wb", "L") --wb matching or ("R", "L") --rl matching
                else:
                    for force_match in [True, False]:
                        match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=force_match)
                        mean_score = score_mat[[match["idx"][0], match["idx"][1]]].mean()
                        if force_match:
                            score_label = "%s%s" % (" vs ".join(comp), "-forced")
                            n_unmatched = None
                        else:
                            score_label = "%s%s" % (" vs ".join(comp), "-unforced")
                            n_unmatched = unmatch["idx"].shape[1] if unmatch["idx"] is not None else 0
                            um_label = "unmatched %s" % comp[1]
                        # Store values in respective dict
                        if c == components[0]:
                            mean_score_d[score_label] = [mean_score]
                            if not force_match:
                                num_unmatched_d[um_label] = [n_unmatched]
                        else:
                            mean_score_d[score_label].append(mean_score)
                            if not force_match:
                                num_unmatched_d[um_label].append(n_unmatched)

        # Store vals as df
        ms_df = pd.DataFrame(mean_score_d, index=components)
        um_df = pd.DataFrame(num_unmatched_d, index=components)
        mean_scores.append(ms_df)
        unmatched.append(um_df)
        # Save combined df
        combined = pd.concat([ms_df, um_df], axis=1)
        out = op.join(out_dir, '%s-matching_simscores.csv' % match_method)
        combined.to_csv(out)

    # We have all the scores for the matching method; now plot.
    fh, axes = plt.subplots(1, len(match_methods), sharex=True, sharey=True, figsize=(18, 6))
    fh.suptitle("Average dissimilarity scores for the best-match pairs", fontsize=16)
    labels = ["wb vs R-unforced", "wb vs L-unforced", "R vs L-unforced", "wb vs RL-unforced",
              "wb vs R-forced", "wb vs L-forced", "R vs L-forced", "wb vs RL-forced",
              "unmatched R", "unmatched L", "unmatched RL"]
    styles = ["r-", "b-", "m-", "g-",
              "r:", "b:", "m:", "g:",
              "r--", "b--", "m--"]

    for i, ax in enumerate(axes):
        ax2 = ax.twinx()
        ms_df, um_df = mean_scores[i], unmatched[i]
        for label, style in zip(labels, styles):
            if label in ms_df.columns:
                ms_df[label].plot(ax=ax, style=style)
            elif label in um_df.columns:
                um_df[label].plot(ax=ax2, style=style)
        ax.set_title("%s-matching" % (match_methods[i]))
        # Shrink current axis by 30%
        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width * 0.75, box.height])
        ax2.set_position([box.x0, box.y0, box.width * 0.75, box.height])
        # Put the legends to the right of the current axis
        ax.legend(loc='lower left', bbox_to_anchor=(1.3, 0.5))
        ax2.legend(loc='upper left', bbox_to_anchor=(1.3, 0.5))
    fh.text(0.5, 0.04, "# of components", ha="center")
    fh.text(0.05, 0.5, "mean %s scores" % scoring, va='center', rotation='vertical')
    fh.text(0.95, 0.5, "# of unmatched R- or L- components", va='center', rotation=-90)

    out_path = op.join(out_dir, '%s_simscores.png' % scoring)
    save_and_close(out_path, fh=fh)