Пример #1
0
 def plot(mod=""):
     # plot sorted distances as line and return data frame
     df = pd.DataFrame({
         "point": np.arange(len(dist_disp)),
         "dist": dist_disp
     })
     plot_2d.plot_lines("knn_dist{}".format(mod),
                        "point", ("dist", ),
                        df=df,
                        show=show,
                        title=config.plot_labels[config.PlotLabels.TITLE])
     return df
Пример #2
0
def plot_unlabeled_hemisphere(path, cols, size=None, show=True):
    """Plot unlabeled hemisphere fractions as bar and line plots.
    
    Args:
        path (str): Path to data frame.
        cols (List[str]): Sequence of columns to plot.
        size (List[int]): Sequence of ``width, height`` to size the figure; 
            defaults to None.
        show (bool): True to display the image; defaults to True.

    """
    # load data frame and convert sample names to ages
    df = pd.read_csv(path)
    ages = ontology.rel_to_abs_ages(df["Sample"].unique())
    df["Age"] = df["Sample"].map(ages)

    # generate a separate graph for each metric
    conds = df["Condition"].unique()
    for col in cols:
        title = "{}".format(col).replace("_", " ")
        y_label = "Fraction of hemisphere unlabeled"

        # plot as lines
        df_lines, regions = df_io.pivot_with_conditions(
            df, ["Age", "Condition"], "Region", col)
        plot_2d.plot_lines(config.filename,
                           "Age",
                           regions,
                           linestyles=("--", "-"),
                           labels=(y_label, "Post-Conceptional Age"),
                           title=title,
                           size=size,
                           show=show,
                           ignore_invis=True,
                           suffix="_{}".format(col),
                           df=df_lines,
                           groups=conds)

        # plot as bars, pivoting value into separate columns by condition
        df_bars = df.pivot(index="Sample", columns="Condition",
                           values=col).reset_index()
        plot_2d.plot_bars(config.filename,
                          conds,
                          col_groups="Sample",
                          y_label=y_label,
                          title=title,
                          size=None,
                          show=show,
                          df=df_bars,
                          prefix="{}_{}".format(
                              os.path.splitext(config.filename)[0], col))
Пример #3
0
def plot_knns(img_paths, suffix=None, show=False, names=None):
    """Plot k-nearest-neighbor distances for multiple sets of blobs,
    overlaying on a single plot.

    Args:
        img_paths (List[str]): Base paths from which registered labels and
            blobs files will be found and output blobs file save location
            will be constructed.
        suffix (str): Suffix for ``path``; defaults to None.
        show (bool): True to plot the distances; defaults to False.
        names (List[str]): Sequence of names corresponding to ``img_paths``
            for the plot legend.

    """
    cluster_settings = config.atlas_profile[profiles.RegKeys.METRICS_CLUSTER]
    knn_n = cluster_settings[profiles.RegKeys.KNN_N]
    if not knn_n:
        knn_n = cluster_settings[profiles.RegKeys.DBSCAN_MINPTS] - 1
    print("Calculating k-nearest-neighbor distances and plotting distances "
          "for neighbor {}".format(knn_n))

    # set up combined data frames for all samples at each zoom level
    df_keys = ("ov", "zoom")
    dfs_comb = {key: [] for key in df_keys}
    names_disp = names if names else []
    for i, img_path in enumerate(img_paths):
        # load blobs associated with image
        mod_path = img_path
        if suffix is not None:
            mod_path = libmag.insert_before_ext(img_path, suffix)
        labels_img_np = sitk_io.load_registered_img(
            mod_path, config.RegNames.IMG_LABELS.value)
        blobs = detector.Blobs().load_blobs(np_io.img_to_blobs_path(img_path))
        scaling, res = np_io.find_scaling(img_path, labels_img_np.shape)
        if blobs is None:
            libmag.warn("unable to load nuclei coordinates for", img_path)
            continue
        # convert to physical units and display k-nearest-neighbors for nuclei
        blobs_phys = np.multiply(blobs.blobs[:, :3], res)
        # TESTING: given the same blobs, simply shift
        #blobs = np.multiply(blobs[i*10000000:, :3], res)
        _, _, dfs = knn_dist(blobs_phys, knn_n, 2, 1000000, False)
        if names is None:
            # default to naming from filename
            names_disp.append(os.path.basename(mod_path))
        for j, df in enumerate(dfs):
            dfs_comb[df_keys[j]].append(df)

    for key in dfs_comb:
        # combine data frames at each zoom level, save, and plot with
        # different colors for each image
        df = df_io.join_dfs(dfs_comb[key], "point")
        dist_cols = [col for col in df.columns if col.startswith("dist")]
        rename_cols = {col: name for col, name in zip(dist_cols, names_disp)}
        df = df.rename(rename_cols, axis=1)
        out_path = "knn_dist_combine_{}".format(key)
        df_io.data_frames_to_csv(df, out_path)
        plot_2d.plot_lines(out_path,
                           "point",
                           rename_cols.values(),
                           df=df,
                           show=show,
                           title=config.plot_labels[config.PlotLabels.TITLE])
Пример #4
0
def plot_region_development(metric, size=None, show=True):
    """Plot regions across development for the given metric.
    
    Args:
        metric (str): Column name of metric to track.
        size (List[int]): Sequence of ``width, height`` to size the figure; 
            defaults to None.
        show (bool): True to display the image; defaults to True.

    """
    # set up access to data frame columns
    id_cols = ["Age", "Condition"]
    extra_cols = ["RegionName"]
    cond_col = "Region"

    # assume that vol stats file is given first, then region IDs;
    # merge in region names and levels
    df_regions = pd.read_csv(config.filenames[1])
    df = pd.read_csv(config.filename).merge(
        df_regions[["Region", "RegionName", "Level"]], on="Region", how="left")

    # convert sample names to ages
    ages = ontology.rel_to_abs_ages(df["Sample"].unique())
    df["Age"] = df["Sample"].map(ages)

    # get large super-structures for normalization to brain tissue, where
    # "non-brain" are spinal cord and ventricles, which are variably labeled
    df_base = df[df["Region"] == 15564]
    ids_nonbr_large = (17651, 126651558)
    dfs_nonbr_large = [df[df["Region"] == n] for n in ids_nonbr_large]

    # get data frame with region IDs of all non-brain structures removed
    labels_ref_lookup = ontology.LabelsRef(
        config.load_labels).load().ref_lookup
    ids_nonbr = []
    for n in ids_nonbr_large:
        ids_nonbr.extend(ontology.get_children_from_id(labels_ref_lookup, n))

    label_id = config.atlas_labels[config.AtlasLabels.ID]
    if label_id is not None:
        # show only selected region and its children
        ids = ontology.get_children_from_id(labels_ref_lookup, label_id)
        df = df[np.isin(df["Region"], ids)]
    df_brain = df.loc[~df["Region"].isin(ids_nonbr)]

    levels = np.sort(df["Level"].unique())
    conds = df["Condition"].unique()

    # get aggregated whole brain tissue for normalization
    cols_show = (*id_cols, cond_col, *extra_cols, metric)
    if dfs_nonbr_large:
        # add all large non-brain structures
        df_nonbr = dfs_nonbr_large[0]
        for df_out in dfs_nonbr_large[1:]:
            df_nonbr = df_io.normalize_df(df_nonbr, id_cols, cond_col, None,
                                          [metric], extra_cols, df_out,
                                          df_io.df_add)
        # subtract them from whole organism to get brain tissue alone,
        # updating given metric in db_base
        df_base = df_io.normalize_df(df_base, id_cols, cond_col, None,
                                     [metric], extra_cols, df_nonbr,
                                     df_io.df_subtract)
    df_base.loc[:, "RegionName"] = "Brain tissue"
    print("Brain {}:".format(metric))
    df_io.print_data_frame(df_base.loc[:, cols_show], "\t")
    df_base_piv, regions = df_io.pivot_with_conditions(df_base, id_cols,
                                                       "RegionName", metric)

    # plot lines with separate styles for each condition and colors for
    # each region name
    linestyles = ("--", "-.", ":", "-")
    num_conds = len(conds)
    linestyles = linestyles * (num_conds // (len(linestyles) + 1) + 1)
    if num_conds < len(linestyles):
        # ensure that 1st and last styles are dashed and solid unless
        linestyles = (*linestyles[:num_conds - 1], linestyles[-1])
    lines_params = {
        "labels": (metric, "Post-Conceptional Age"),
        "linestyles": linestyles,
        "size": size,
        "show": show,
        "ignore_invis": True,
        "groups": conds,
        "marker": ".",
    }
    line_params_norm = lines_params.copy()
    line_params_norm["labels"] = ("Fraction", "Post-Conceptional Age")
    plot_2d.plot_lines(config.filename,
                       "Age",
                       regions,
                       title="Whole Brain Development ({})".format(metric),
                       suffix="_dev_{}_brain".format(metric),
                       df=df_base_piv,
                       **lines_params)

    for level in levels:
        # plot raw metric at given level
        df_level = df.loc[df["Level"] == level]
        print("Raw {}:".format(metric))
        df_io.print_data_frame(df_level.loc[:, cols_show], "\t")
        df_level_piv, regions = df_io.pivot_with_conditions(
            df_level, id_cols, "RegionName", metric)
        plot_2d.plot_lines(config.filename,
                           "Age",
                           regions,
                           title="Structure Development ({}, Level {})".format(
                               metric, level),
                           suffix="_dev_{}_level{}".format(metric, level),
                           df=df_level_piv,
                           **lines_params)

        # plot metric normalized to whole brain tissue; structures
        # above removed regions will still contain them
        df_brain_level = df_brain.loc[df_brain["Level"] == level]
        df_norm = df_io.normalize_df(df_brain_level, id_cols, cond_col, None,
                                     [metric], extra_cols, df_base)
        print("{} normalized to whole brain:".format(metric))
        df_io.print_data_frame(df_norm.loc[:, cols_show], "\t")
        df_norm_piv, regions = df_io.pivot_with_conditions(
            df_norm, id_cols, "RegionName", metric)
        plot_2d.plot_lines(
            config.filename,
            "Age",
            regions,
            units=(None, config.plot_labels[config.PlotLabels.X_UNIT]),
            title=("Structure Development Normalized to Whole "
                   "Brain ({}, Level {})".format(metric, level)),
            suffix="_dev_{}_level{}_norm".format(metric, level),
            df=df_norm_piv,
            **line_params_norm)