Python get_filedb示例，lib.filedb.get_filedb Python示例

示例#1

0

显示文件

def main():
    this_dir = os.path.dirname(__file__)
    base = os.path.join(this_dir, "../datasets/LSM700_63x_sigb")
    alldf = pd.read_hdf(os.path.join(base, "single_cell_data.h5"))
    files = filedb.get_filedb(os.path.join(base, "file_list.tsv"))
    alldf["ratio"] = alldf["green_raw_bg_mean"] / alldf["red_raw_bg_mean"]

    chans = [
        ("green_raw_bg_mean", "unnormed"),
        ("green_raw_bg_maxnorm",
         "maxnorm"),  ## normalized by RFP gradient peak
        ("green_raw_bg_meannorm", "meannorm")
    ]
    chans = [("ratio", "ratio")]

    save_path = os.path.join(base, "gradients")
    try:
        os.mkdir(save_path)
    except FileExistsError as e:
        pass

    for strain, name in [("jlb021", "WT"), ("jlb088", "delRU"),
                         ("jlb039", "delQP"), ("jlb095", "2xQP")]:
        #save_data_file(files, alldf, chans, strain, name)
        save_indiv_mean_data(files, alldf, chans, strain, name, save_path)
        save_error_bar_data(files, alldf, chans, strain, name, save_path)

示例#2

0

显示文件

文件： subfig_sigb_grad.py 项目： npmurphy/biofilm_pulse

def main():
    tenx_basepath = "../../datasets/biofilm_cryoslice/LSM780_10x_sigb/"
    gradient_df = pd.read_hdf(
        os.path.join(tenx_basepath, "gradient_summary.h5"), "data")
    # gradient_df["ratio"] = gradient_df["mean_green"]/gradient_df["mean_red"]
    gradient_df["ratio"] = gradient_df["mean_green"] / gradient_df["mean_red"]

    file_df = filedb.get_filedb(os.path.join(tenx_basepath, "filedb.tsv"))

    fig, ax = plt.subplots()
    ax = get_figure(ax, file_df, gradient_df)
    plt.show()

示例#3

0

显示文件

def main():
    this_dir = os.path.dirname(__file__)

    basedir = "../../datasets/LSM700_63x_sigb"
    file_df = filedb.get_filedb(
        os.path.join(this_dir, basedir, "file_list.tsv"))
    cachedpath = os.path.join(this_dir, basedir, "histo_tops_normed.h5")

    generate_data_subset = False

    gchan = "green_raw_bg_mean"
    rchan = "red_raw_bg_mean"
    nbins = 100
    slice_srt_end = 5, 7
    time = 48
    location = "center"
    gmax = 1

    gbins = np.linspace(0, gmax, nbins)

    list_of_histos = [
        #("2xqp_sigar_sigby",  gchan, rchan, gbins, slice_srt_end, "2xQP", strain_color["JLB095"]),
        ("wt_sigar_sigby", gchan, rchan, gbins, slice_srt_end,
         "WT P$_{sigB}$-YFP", strain_color["JLB021"]),
        ("delqp_sigar_sigby", gchan, rchan, gbins, slice_srt_end, "del qp",
         strain_color["JLB039"]),
        ("delru_sigar_sigby", gchan, rchan, gbins, slice_srt_end, "del ru",
         strain_color["JLB088"])
    ]

    fig, axhisto = plt.subplots(1, 1)
    axes = [axhisto] * len(list_of_histos)  #, axhisto, axhisto, axhisto]

    if generate_data_subset:
        df = pd.read_hdf(
            os.path.join(this_dir, basedir, "single_cell_data.h5"), "cells")
        cellsdf = get_data_subset(df, file_df, list_of_histos, time, location,
                                  cachedpath)

    else:
        cellsdf = pd.read_hdf(cachedpath, "cells")

    axes = get_figure(cellsdf, file_df, axes, time, location, list_of_histos)
    #axes[i].set_title(label)
    axes[0].legend()
    plt.show()

示例#4

0

显示文件

def main():
    base = "../../datasets/biofilm_cryoslice/LSM700_63x_sspb_giant/"
    datadir = os.path.join(base, "kd_spore_cell")
    file_df = filedb.get_filedb(base + "file_list.tsv")

    sspb_strains = [(
        'JLB077',
        "WT",
    ), (
        'JLB118',
        "ΔσB",
    ), (
        'JLB117',
        "2×rsbQP",
    )]
    fig_main, ax = plt.subplots(2, 1)
    ax[0] = get_figure(ax[0], datadir, file_df, sspb_strains, "spore")
    ax[1] = get_figure(ax[1], datadir, file_df, sspb_strains, "cell")
    ax[0].set_xlabel("Distance from air interface (μm)")
    #ax[0].set_ylabel("Proportion of cells that are spores")
    plt.show()

示例#5

0

显示文件

文件： summarize_cell_spore_counts.py 项目： npmurphy/biofilm_pulse

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--file_db")
    parser.add_argument("--spore_db")
    parser.add_argument("--cell_db")
    parser.add_argument("--area_cache")
    parser.add_argument("--data_files_to_recompute_area_cache", type=str)
    parser.add_argument("--individual_files",
                        action="store_true",
                        default=False)
    parser.add_argument("--mean_finals", action="store_true", default=False)
    parser.add_argument("--out_file")
    args = parser.parse_args()

    file_df = filedb.get_filedb(args.file_db)

    # This image is not representative
    file_df = file_df[~((file_df["name"] == "JLB077_48hrs_center_1_1") &
                        (file_df["dirname"] == "Batch1"))]

    if args.data_files_to_recompute_area_cache:
        cache_pixel_area(file_df, args.area_cache,
                         args.data_files_to_recompute_area_cache)
        print(
            "done caching pixel witdths, exiting, run again with out the data_files_to_recompute_area_cache flag"
        )
        return 0

    print("loading the hd5 files")
    spore_df = pd.read_hdf(args.spore_db, "spores")
    cell_df = pd.read_hdf(args.cell_db, "cells")
    cell_df = cell_df[~cell_df["spore_overlap"]].copy(
    )  # remove cells that overlap spores
    distance_cache = scipy.io.loadmat(args.area_cache)

    print("ready to work")

    cached = cache_indiv_cell_spore(file_df, spore_df, cell_df, distance_cache)
    cached.to_csv(args.out_file, sep="\t", index_label="index")

示例#6

0

显示文件

文件： subfig_spore_count_gradient.py 项目： npmurphy/biofilm_pulse

def main():
    import pandas as pd
    import lib.filedb as filedb
    import os.path

    base = os.path.join(os.path.dirname(__file__),
                        "../../datasets/LSM700_63x_sspb_giant/")

    file_df = filedb.get_filedb(os.path.join(base, "file_list.tsv"))
    file_df = file_df[~((file_df["name"] == "JLB077_48hrs_center_1_1")
                        & (file_df["dirname"] == "Batch1"))]
    individual = pd.read_csv(os.path.join(base, "spore_cell_individual.tsv"),
                             sep="\t",
                             index_col="index")

    fig, ax = plt.subplots(3, 1)

    sspb_strains = [
        ("JLB077", "WT"),
        ("JLB117", "2x$\mathit{rsbQP}$"),
        ("JLB118", "ΔσB"),
    ]

    chan_axes = {
        "area_norm_spore_counts": 0,
        "area_norm_cell_counts": 1,
        "area_norm_total_counts": 2,
    }

    # Some images had little tiny regions at the end with <10 cell spores in them
    # that produced huges spikes of 100% spores etc.
    # to ignore this we are using 100 as a minimum sample size.
    # 10 does the job, 500, 100 look good at the top but introduce more artifacts later.
    # 100 is just a big enough number.

    for strain, _ in sspb_strains:
        for chan, a in chan_axes.items():
            ax[a] = get_figure(ax[a], file_df, individual, strain, chan, 100)
    plt.show()

示例#7

0

显示文件

文件： figure_spore_grad_comp.py 项目： npmurphy/biofilm_pulse

# spcount_ax = plt.subplot(gs[1])
# cellcount_ax = plt.subplot(gs[2])
fig, ax = plt.subplots(2, 1)

spcount_ax = ax[0]
cellcount_ax = ax[1]

ylabel_cord = (-0.07, 0.5)

sspb_strains = [(st, figure_util.strain_label[st])
                for st in ['JLB077', 'JLB117', 'JLB118']]

this_dir = os.path.dirname(__file__)
base = os.path.join(this_dir, "../../datasets/LSM700_63x_sspb_giant/")
datadir = os.path.join(base, "kd_spore_cell")
file_df = filedb.get_filedb(base + "file_list.tsv")

###########
## Spore density
spcount_ax = subfig_density_gradient.get_figure(spcount_ax, datadir, file_df,
                                                sspb_strains, "spore")
spcount_ax.set_ylabel("Spore density")
spcount_ax.set_ylim(0, 0.0003)
spcount_ax.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
spcount_ax.get_yaxis().set_label_coords(*ylabel_cord)
leg = spcount_ax.legend()

###########
## cell density
cellcount_ax = subfig_density_gradient.get_figure(cellcount_ax, datadir,
                                                  file_df, sspb_strains,

示例#8

0

显示文件

文件： make_histograms.py 项目： npmurphy/biofilm_pulse

def main():

    basedir = os.path.join(this_dir, "../../datasets/LSM700_63x_sigb")

    if not USE_CACHE_PLOTS:
        cell_df = pd.read_hdf(os.path.join(basedir, "single_cell_data.h5"),
                              "cells")
        file_df = filedb.get_filedb(os.path.join(basedir, "file_list.tsv"))
    else:
        file_df = None
        cell_df = None

    time = 48
    location = "center"
    slice_srt, slice_end = 5, 7  #10, 15
    #slice_srt, slice_end = 5, 6 #10, 15 #
    # There is no major difference between 5-6 and 7-8, just the QP skew is bigger in 5-6
    #slice_srt, slice_end = 7, 8 #10, 15
    # Moving to 2um because it makes the plots look nicer.

    # fig, ax = plt.subplots(4, 2)
    # axhisto = ax[:, 1]
    # aximage = ax[:, 0]
    fig, ax = plt.subplots(2, 4)
    axhisto = ax[1, :]
    aximage = ax[0, :]

    for i, (name, path, roi, chans) in enumerate(image_list):
        impath = os.path.join(image_base_dir, path)
        aximage[i] = subfig_draw_bin.get_figure(aximage[i],
                                                name,
                                                impath,
                                                roi,
                                                chans,
                                                FP_max_min,
                                                (slice_srt, slice_end),
                                                add_scale_bar=i == 0)
        aximage[i].set_title("")
        aximage[i].text(imgletter_lab[0],
                        imgletter_lab[1],
                        topletters[i],
                        transform=aximage[i].transAxes,
                        **letter_settings,
                        color="white")

    text_x = 0.40
    text_top = 0.85
    line_sep = 0.15
    title_loc, cv_loc, samp_loc, cell_loc = [
        (text_x, text_top - (line_sep * i)) for i in range(4)
    ]

    strain_map, des_strain_map = strainmap.load()
    gchan = "green_raw_bg_meannorm"
    rchan = "red_raw_bg_meannorm"
    if not USE_CACHE_PLOTS:
        cell_df = cell_df[cell_df[rchan] > 0].copy()

    #max_val = 30000
    #max_val = 1.0 #6.0 #20000
    #gmax_val = 1.0 #7.5
    max_val = 6.5  #2.5
    gmax_val = 6.5  #0.75
    nbins = 150
    rbins = (0, max_val, nbins)
    gbins = (0, gmax_val, nbins)
    percentile = 0  #99
    list_of_histos = [("wt_sigar_sigby", rchan, "WT P$_{sigA}$-RFP",
                       figure_util.strain_color["JLB021"]),
                      ("wt_sigar_sigby", gchan, "WT P$_{sigB}$-YFP",
                       figure_util.strain_color["JLB021"]),
                      ("delqp_sigar_sigby", gchan, "ΔrsbQP P$_{sigB}$-YFP",
                       figure_util.strain_color["JLB039"]),
                      ("delru_sigar_sigby", gchan, "ΔrsbRU P$_{sigB}$-YFP",
                       figure_util.strain_color["JLB088"])]
    print("-----------")
    for i, (strain, chan, label, color) in enumerate(list_of_histos):
        print(label)
        strain_df = None
        if not USE_CACHE_PLOTS:
            fids = file_df[(file_df["time"] == time)
                           & (file_df["location"] == location) &
                           (file_df["strain"] == des_strain_map[strain])].index
            strain_df = cell_df[cell_df["global_file_id"].isin(fids)]

        dset = time, location, strain
        plot_args = {"color": color, "max_min": "std", "mode_mean": False}
        tbins = gbins
        if "red" in chan:
            tbins = rbins

        args = (axhisto[i], strain_df, chan, tbins, (slice_srt, slice_end),
                dset, percentile, USE_CACHE_PLOTS, this_dir, plot_args)
        axhisto[i], _, meandmed = subfig_indivfile_histo.get_figure(*args)
        axhisto[i].text(1.0,
                        hisletter_lab[1],
                        label,
                        horizontalalignment='right',
                        verticalalignment='top',
                        color="black",
                        fontsize=plt.rcParams["axes.titlesize"],
                        transform=axhisto[i].transAxes)

        axhisto[i].text(hisletter_lab[0],
                        hisletter_lab[1],
                        letters[i],
                        transform=axhisto[i].transAxes,
                        **letter_settings)

    #leg = axhisto[0].legend(loc="center right")

    #axhisto[-1].set_xlabel("Mean normalised cell fluorecence")
    axhisto[0].set_ylabel("Percentage of cells")

    axhisto[0].set_xlim(0, max_val)
    for a in np.ravel(axhisto):
        #a.set_ylabel("Percentage of cells")
        a.set_xlabel("Mean normalised cell fluorecence")
        a.set_ylim(0, 5)
        a.set_xlim(0, gmax_val)
        a.tick_params(axis='x', which='both',
                      direction='out')  #, length=2, pad=0)
        a.tick_params(axis='y', which='both',
                      direction='out')  #, length=2, pad=0)

    # for a in axhisto[:-1]:
    #     a.set_xticklabels([])
    for a in axhisto[1:]:
        a.set_yticklabels([])

    filename = "demo_longtail"
    #fig.subplots_adjust(left=000, ri0ht=0.98, top = 1.0, bottom=0.06, hspace=0.08, wspace=0.2)
    #width, height = figure_util.get_figsize(figure_util.fig_width_small_pt, wf=1.0, hf=1.7)
    fig.subplots_adjust(left=0.05,
                        right=0.99,
                        top=1.0,
                        bottom=0.10,
                        hspace=0.08,
                        wspace=0.15)
    width, height = figure_util.get_figsize(figure_util.fig_width_big_pt,
                                            wf=1.0,
                                            hf=0.5)
    fig.set_size_inches(width, height)
    figure_util.save_figures(fig, filename, ["png", "pdf"], this_dir)

示例#9

0

显示文件

文件： joy_plots_of_gradients.py 项目： npmurphy/biofilm_pulse

def main():

    curve_score_methods = {
        "std": ("Standard Deviation", 2.5, lambda d, h, b: np.std(d)),
        "mean": ("Mean", 4.0, lambda d, h, b: np.mean(d)),
        "cv": ("Coefficient of variation", 1.2,
               lambda d, h, b: scipy.stats.variation(d)),
        "skew": ("modern skew", 3.5, lambda d, h, b: scipy.stats.skew(d)),
        "skew_normed":
        ("Skew", 4.0, lambda d, h, b: scipy.stats.skew(d, bias=False)),
        "mode": ("Mode", 3.5, lambda d, h, b: b[h.argmax()]),
        "num": ("# cells", 2000, lambda d, h, b: len(d)),
        "pearson_mode_mean":
        ("pearson Mode mean", 1.2, pearson_mode_mean_skew),
        "non_parameteric_skew": ("Non parameteric", 0.4, non_parametric_skew),
        "kurtosis": ("Kurtosis", 8.0, lambda d, h, b: scipy.stats.kurtosis(d))
    }

    plot_colors = [  #"mean",
        "std",
        #"cv",
        #"skew",
        #"num",
        "skew_normed",
        #"pearson_mode_mean",
        #"non_parameteric_skew",
        #"kurtosis",
    ]

    basedir = "../../datasets/biofilm_cryoslice/LSM700_63x_sigb"
    #cell_df = pd.read_hdf(os.path.join(basedir, "new_edge_bgsubv2_maxnorm_lh1segment.h5"), "cells")
    #cell_df = pd.read_hdf(os.path.join(basedir, "new_edge_bgsubv2_maxnorm_lh1segment.h5"), "cells")
    #cell_df = pd.read_hdf(os.path.join(basedir, "mini_bgsubv2_maxnorm_comp5.h5"), "cells")
    cell_df = pd.read_hdf(os.path.join(basedir, "bgsubv2_maxnorm_comp5.h5"),
                          "cells")
    cell_df = cell_df[cell_df["red_bg_maxnorm"] > 0]
    cell_df = cell_df[cell_df["distance"] > 2]
    time = 48  #.0
    location = "center"
    file_df = filedb.get_filedb(os.path.join(basedir, "file_list.tsv"))
    strain_map, des_strain_map = strainmap.load()

    percentile = 0  #99#
    gmax = None
    rmax = None
    gmax = 0.7
    rmax = 3.0
    strains = [("wt_sigar_sigby", "red_bg_maxnorm", rmax),
               ("wt_sigar_sigby", "green_bg_maxnorm", gmax),
               ("delqp_sigar_sigby", "green_bg_maxnorm", gmax),
               ("delru_sigar_sigby", "green_bg_maxnorm", gmax),
               ("2xqp_sigar_sigby", "green_bg_maxnorm", gmax)]

    fig, ax = plt.subplots(len(plot_colors),
                           len(strains),
                           sharex=True,
                           sharey=True)
    for c, (strain, chan, maxv) in enumerate(strains):
        strain_num = des_strain_map[strain]
        distances, sbins, histograms, stats = get_strain_result(
            file_df, cell_df, time, location, strain_num, chan, maxv,
            percentile, curve_score_methods)
        for r, k in enumerate(plot_colors):
            color = figure_util.strain_color[strain_num.upper()]
            ax[r, c], mx, mv = plot_curves(ax[r, c], color, distances, sbins,
                                           histograms, stats, k)

            if c == len(strains) - 1:
                posn = ax[r, c].get_position()
                cbax = fig.add_axes(
                    [posn.x0 + posn.width + 0.01, posn.y0, 0.02, posn.height])
                max_val = curve_score_methods[k][1]
                label = curve_score_methods[k][0]
                sm = plt.cm.ScalarMappable(cmap=plt.cm.plasma,
                                           norm=plt.Normalize(vmin=0,
                                                              vmax=max_val))
                sm._A = []
                plt.colorbar(sm, cax=cbax)  #, fig=fig)
                cbax.set_ylabel(label, rotation=-90, labelpad=8)

    #max_val = m
    #metric_name = n
    # ax[0].set_title("WT P$_{sigA}$-RFP")
    # ax[1].set_title("WT P$_{sigB}$-YFP")
    # ax[2].set_title("ΔrsbQP P$_{sigB}$-YFP")
    # ax[3].set_title("ΔrsbRU P$_{sigB}$-YFP")

    # for a in ax[1:]:
    #     a.set_ylabel("")
    # ax[0].set_ylabel("Distance from air interface (μm)")

    plt.show()

示例#10

0

显示文件

文件： summarise_10x_gradients.py 项目： npmurphy/biofilm_pulse

import os.path

import pandas as pd

import lib.strainmap as strainmap
from lib import filedb

dataset_dir = "datasets/LSM780_10x_sigb/"
gradient_df = pd.read_hdf(dataset_dir + "gradient_data.h5", "data")
gradient_df["ratio"] = gradient_df["green_bg_mean"] / gradient_df["red_bg_mean"]
output_dir = os.path.join(dataset_dir, "gradient_summary")

file_df = filedb.get_filedb(dataset_dir + "filedb.tsv")


time = 48.0
location = "center"
strain_map, des_strain_map = strainmap.load()


def get_strain(name):
    fids = file_df[
        (file_df["time"] == time)
        & (file_df["location"] == location)
        & (file_df["strain"] == des_strain_map[name])
    ].index
    print(name, " has ", len(fids))
    print("N=", file_df.loc[fids, "name"].unique())
    df = gradient_df[gradient_df["file_id"].isin(fids)]
    return df

示例#11

0

显示文件

文件： summarise_live_20x_gradients.py 项目： npmurphy/biofilm_pulse

import numpy as np
#import data.bio_film_data.strainmap as strainmap
import lib.strainmap as strainmap
from lib import filedb

#dataset_dir = "datasets/LSM780_10x_sigb/"
dataset_dir = "/media/nmurphy/BF_Data_Orange/datasets/lsm700_live20x_newstrain1"
#gradient_df = pd.read_hdf(dataset_dir + "gradient_data.h5", "data")
gradient_df = pd.read_hdf(
    os.path.join(dataset_dir, "gradient_data_distmap.h5"), "data")
#gradient_df["ratio"] = gradient_df["green_bg_mean"]/gradient_df["red_bg_mean"]
gradient_df[
    "ratio"] = gradient_df["green_raw_mean"] / gradient_df["red_raw_mean"]
output_dir = os.path.join(dataset_dir, "gradient_summary")

file_df = filedb.get_filedb(os.path.join(dataset_dir, "file_list.tsv"))

time = 48.0
#strain_map, des_strain_map = strainmap.load()
strain_to_type, type_to_strain = strainmap.load()
cell_types = np.unique([t[0] for t in strain_to_type.values()])
strain_to_type = {s: t[0] for s, t in strain_to_type.items()}
type_to_strain = dict(zip(cell_types, [[]] * len(cell_types)))
for strain, typel in strain_to_type.items():
    type_to_strain[typel] = type_to_strain[typel] + [strain]


def get_strain(name):
    fdf = file_df[(file_df["time"] == time)
                  & (file_df["strain"].isin(type_to_strain[name]))]
    fids = fdf.index

示例#12

0

显示文件

文件： figure_spore_counts.py 项目： npmurphy/biofilm_pulse

# fig = plt.figure()
# import matplotlib.gridspec as gridspec
# gs = gridspec.GridSpec(3, 1, height_ratios=[0.4, 0.3, 0.3])
# spimg_ax = plt.subplot(gs[0])s
# spcount_ax = plt.subplot(gs[1])
# cellcount_ax = plt.subplot(gs[2])

ylabel_cord = (-0.07, 0.5)

sspb_strains = [(st, lib.figure_util.strain_label[st])
                for st in ["JLB077", "JLB117", "JLB118"]]

this_dir = os.path.dirname(__file__)
base = os.path.join(this_dir, "../../datasets/LSM700_63x_sspb_giant/")

file_df = filedb.get_filedb(os.path.join(base, "file_list.tsv"))
file_df = file_df[~((file_df["name"] == "JLB077_48hrs_center_1_1") &
                    (file_df["dirname"] == "Batch1"))]
individual = pd.read_csv(os.path.join(base, "spore_cell_individual.tsv"),
                         sep="\t",
                         index_col="index")

sspb_strains = ["JLB077", "JLB117", "JLB118"]

# Some images had little tiny regions at the end with <10 cell spores in them
# that produced huges spikes of 100% spores etc.
# to ignore this we are using 100 as a minimum sample size.
# 10 does the job, 500, 100 look good at the top but introduce more artifacts later.
# 100 is just a big enough number.

###########

示例#13

0

显示文件

sbgrad_ax = plt.subplot(outer_gs[0, 1])
spgrad_ax = plt.subplot(outer_gs[0, 0])
wtspr_ax = plt.subplot(picts_gs[0, :])
x2spr_ax = plt.subplot(picts_gs[1, :])

###########
## 10x sigb grad
###########
tenx_basepath = os.path.join(this_dir, "../../datasets/LSM780_10x_sigb/")
tenx_gradient_df = pd.read_hdf(os.path.join(tenx_basepath, "gradient_data.h5"), "data")
# gradient_df["ratio"] = gradient_df["mean_green"]/gradient_df["mean_red"]
print(tenx_gradient_df.columns)
tenx_gradient_df["ratio"] = (
    tenx_gradient_df["green_bg_mean"] / tenx_gradient_df["red_bg_mean"]
)
tenx_file_df = filedb.get_filedb(os.path.join(tenx_basepath, "filedb.tsv"))
sbgrad_ax = subfig_sigb_grad.get_figure(
    sbgrad_ax, tenx_file_df, tenx_gradient_df, ["wt_sigar_sigby", "2xqp_sigar_sigby"]
)
sbgrad_ax.set_title("$\sigma^B$", y=0.98, va="top")
sbgrad_ax.set_ylabel("P$_{sigB}$-YFP / P$_{sigA}$-RFP")
sbgrad_ax.set_xlabel("Distance from biofilm top (μm)")


#######
# spore gradient
# #############
sspb_strains = ["JLB077", "JLB117"]  # , 'JLB118']
spbase = os.path.join(this_dir, "../../datasets/LSM700_63x_sspb_giant/")

spfile_df = filedb.get_filedb(os.path.join(spbase, "file_list.tsv"))

示例#14

0

显示文件

文件： sup_meta_histogram.py 项目： npmurphy/biofilm_pulse

def main():
    curve_score_methods = {
        "std":
        ("Standard deviation", (0.0, 1.0), lambda d, h, b: np.std(d)),  # 1.5,
        "mean": ("Mean", (0.0, 4.0), lambda d, h, b: np.mean(d)),
        "cv": (
            "Coefficient of variation",
            (0.3, 0.8),
            lambda d, h, b: scipy.stats.variation(d),
        ),
        # "skew": ("modern skew",
        #        0.0, 3.0,
        #        lambda d, h, b: scipy.stats.skew(d)),
        "skew_normed": (
            "Skew",
            (0.0, 2.9),
            lambda d, h, b: scipy.stats.skew(d, bias=False),
        ),
        # "mode": ("Mode",
        #        0.0, 3.5,
        #        lambda d, h, b: b[h.argmax()]),
        # "num": ("# cells",
        #        0.0, 2000,
        #        lambda d, h, b: len(d)),
        # "pearson_mode_mean": ("pearson Mode mean",
        #         0.0, 1.2,
        #         joy_plots_of_gradients.pearson_mode_mean_skew),
        # "non_parameteric_skew": ("Non parameteric",
        #         0.0, 0.4,
        #         joy_plots_of_gradients.non_parametric_skew),
        "kurtosis":
        ("Kurtosis", (0.0, 8.0), lambda d, h, b: scipy.stats.kurtosis(d)),
    }

    plot_colors = [  # "mean",
        # "std",
        "cv",
        # "skew",
        # "num",
        # "skew_normed",
        "skew_normed",  # same as pandas
        # "pearson_mode_mean",
        # "non_parameteric_skew",
        # "kurtosis",
    ]

    # basedir = "../../data/bio_film_data/63xdatasets"
    #this_dir = os.path.dirname(__file__)
    this_dir = "/media/nmurphy/BF_Data_Orange/"
    #basedir = os.path.join(this_dir, "../../datasets/LSM700_63x_sigb")
    basedir = os.path.join(this_dir, "datasets/LSM700_63x_sigb")
    # cell_df = pd.read_hdf(os.path.join(basedir, "edge_redo_lh1segment_data_bg_back_bleed.h5"), "cells")
    # cell_df = pd.read_hdf(os.path.join(basedir, "new_edge_bgsubv2_maxnorm_lh1segment.h5"), "cells")
    cell_df = pd.read_hdf(os.path.join(basedir, "single_cell_data.h5"),
                          "cells")
    # cell_df = pd.read_hdf(os.path.join(basedir, "edge_redo_lh1segment_data.h5"), "cells")
    # cell_df = pd.read_hdf(os.path.join(basedir, "lh1segment_bgsub_data.h5"), "cells")
    # cell_df = cell_df[cell_df["area"] > 140]
    cell_df = cell_df[cell_df["distance"] > 2]
    time = 48  # .0
    location = "center"
    file_df = filedb.get_filedb(os.path.join(basedir, "file_list.tsv"))
    strain_map, des_strain_map = strainmap.load()

    # cbar_mins = {0: 0.5, 1:0.0}

    percentile = 0  # 99#
    # green_chan = "meannorm_green"
    # red_chan = "meannorm_red"
    rmax = 6.5
    gmax = 6.5  # 0.4
    green_chan = "green_raw_bg_mean"
    red_chan = "red_raw_bg_mean"
    rmax = 50000
    gmax = 10000
    strains = [
        ("wt_sigar_sigby", red_chan, rmax, "WT\n P$_{sigA}$-RFP"),
        ("wt_sigar_sigby", green_chan, gmax, "WT\n P$_{\mathit{sigB}}$-YFP"),
        (
            "delru_sigar_sigby",
            green_chan,
            gmax,
            "Δ$\mathit{rsbRU}$\n P$_{\mathit{sigB}}$-YFP",
        ),
        (
            "delqp_sigar_sigby",
            green_chan,
            gmax,
            "Δ$\mathit{rsbQP}$\n P$_{\mathit{sigB}}$-YFP",
        ),
    ]
    # ("2xqp_sigar_sigby", green_chan,  gmax,  "2$\\times$rsbQP\n P$_{sigB}$-YFP")]

    fig, ax = plt.subplots(len(plot_colors), len(strains), sharey=True)
    for c, (strain, chan, max_val, name) in enumerate(strains):
        strain_num = des_strain_map[strain]
        distances, sbins, histograms, stats = joy_plots_of_gradients.get_strain_result(
            file_df,
            cell_df,
            time,
            location,
            strain_num,
            chan,
            max_val,
            percentile,
            curve_score_methods,
        )
        for r, k in enumerate(plot_colors):
            color = figure_util.strain_color[strain_num.upper()]
            ax[r, c], mv, leglist = joy_plots_of_gradients.plot_curves(
                ax[r, c], color, distances, sbins, histograms, stats, k)

            if c == len(strains) - 1:
                posn = ax[r, c].get_position()
                cbax = fig.add_axes([
                    posn.x0 + posn.width + 0.0005, posn.y0, 0.015, posn.height
                ])
                label = curve_score_methods[k][0]
                min_zval = curve_score_methods[k][1][0]
                max_zval = curve_score_methods[k][1][1]
                sm = plt.cm.ScalarMappable(
                    cmap=plt.get_cmap("viridis"),
                    norm=plt.Normalize(vmin=min_zval, vmax=max_zval),
                )
                sm._A = []
                _ = plt.colorbar(sm, cax=cbax)  # , fig=fig)
                cbax.set_ylabel(label, rotation=-90, labelpad=8)
                cbax.tick_params(direction="out")

            if r == 0:
                ax[r, c].set_title(name, fontsize=6)
                ax[r, c].get_xaxis().set_ticklabels([])

            ax[r, c].set_xlim(0, max_val)

    # this didnt return the right mode for some reason
    # leg = ax[0, -1].legend(leglist)

    leg = ax[0, -1].legend(leglist, ["Mode", "Mean"],
                           loc="lower left",
                           bbox_to_anchor=(0.84, 0.97))
    leg.set_zorder(400)
    for a in ax.flatten():
        a.tick_params(direction="out")
    ax[0, 0].annotate(
        "Distance from top of biofilm (μm)",
        xy=(0, 0),
        xytext=(0.02, 0.5),
        textcoords="figure fraction",
        # arrowprops=dict(facecolor='black', shrink=0.05),
        horizontalalignment="center",
        verticalalignment="center",
        fontsize="medium",
        color=mpl.rcParams["axes.labelcolor"],
        rotation=90,
    )
    ax[1, 2].annotate(
        "Normalized fluoresence",
        xy=(0, 0),
        xytext=(0.5, 0.04),
        textcoords="figure fraction",
        # arrowprops=dict(facecolor='black', shrink=0.05),
        horizontalalignment="center",
        verticalalignment="center",
        fontsize="medium",
        color=mpl.rcParams["axes.labelcolor"],
    )
    # for a in ax[:, 0].flatten():
    #     ticklabs = a.yaxis.get_ticklabels()
    #     ticklabs = a.get_yticks()#.tolist()
    #     ticklabs[-1] = ''

    letters = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"]
    # letter_lab = (-0.13, 0.98)
    for a, l in zip(ax.flatten(), letters):
        a.annotate(
            l,
            xy=(0, 0),
            xytext=(-0.13, 0.95),
            textcoords="axes fraction",
            # arrowprops=dict(facecolor='black', shrink=0.05),
            horizontalalignment="center",
            verticalalignment="center",
            fontsize=figure_util.letter_font_size,
            color="black",
        )
    #    a.text(letter_lab[0], letter_lab[1], l, transform=a.transAxes, fontsize=8)

    filename = "sup_meta_histo"
    width, height = figure_util.get_figsize(figure_util.fig_width_medium_pt,
                                            wf=1.0,
                                            hf=0.6)
    fig.subplots_adjust(left=0.085,
                        right=0.89,
                        top=0.89,
                        bottom=0.13,
                        hspace=0.20,
                        wspace=0.25)
    fig.set_size_inches(width, height)  # common.cm2inch(width, height))
    figure_util.save_figures(fig, filename, ["png", "pdf"], this_dir)

示例#15

0

显示文件

def main():

    basedir = os.path.join(this_dir, "../../datasets/LSM700_63x_sigb")

    time = 48
    location = "center"
    # slice_srt, slice_end = 5, 7 #10, 15
    slice_srt_end = (5, 7)

    fig, ax = plt.subplots(2, 2)
    axhisto = ax[1, 1]
    aximage = [ax[0, 0], ax[0, 1], ax[1, 0]]

    for i, (name, path, roi, chans) in enumerate(image_list):
        impath = os.path.join(image_base_dir, path)
        aximage[i] = subfig_draw_bin.get_figure(
            aximage[i],
            name,
            impath,
            roi,
            chans,
            FP_max_min,
            slice_srt_end,
            add_scale_bar=i == 0,
        )
        aximage[i].set_title("")
        aximage[i].text(imgletter_lab[0],
                        imgletter_lab[1],
                        topletters[i],
                        transform=aximage[i].transAxes,
                        **letter_settings)  # , color="white")
        aximage[i].text(0.05,
                        0.05,
                        name,
                        transform=aximage[i].transAxes,
                        **label_settings,
                        color="white")

    #####################
    ## Histograms
    generate_data_subset = False

    strain_map, des_strain_map = strainmap.load()

    file_df = filedb.get_filedb(os.path.join(basedir, "file_list.tsv"))
    cachedpath = os.path.join(basedir, "histo_tops_normed.h5")

    gchan = "green_raw_bg_mean"
    rchan = "red_raw_bg_mean"
    nbins = 150
    gmax = 1
    gbins = np.linspace(0, gmax, nbins)

    list_of_histos = [
        # ("2xqp_sigar_sigby",  gchan, rchan, gbins, slice_srt_end, "2xQP", strain_color["JLB095"]),
        (
            "wt_sigar_sigby",
            gchan,
            rchan,
            gbins,
            slice_srt_end,
            r"WT P$_{\mathit{sigB}}$-YFP",
            strain_color["JLB021"],
        ),
        (
            "delru_sigar_sigby",
            gchan,
            rchan,
            gbins,
            slice_srt_end,
            r"Δ$\mathit{rsbRU}$ P$_{\mathit{sigB}}$-YFP",
            strain_color["JLB088"],
        ),
        (
            "delqp_sigar_sigby",
            gchan,
            rchan,
            gbins,
            slice_srt_end,
            r"Δ$\mathit{rsbQP}$ P$_{\mathit{sigB}}$-YFP",
            strain_color["JLB039"],
        ),
    ]
    axes = [axhisto] * len(list_of_histos)
    if generate_data_subset:
        df = pd.read_hdf(os.path.join(basedir, "single_cell_data.h5"), "cells")
        cellsdf = subfig_normalised_histos.get_data_subset(
            df, file_df, list_of_histos, time, location, cachedpath)
    else:
        cellsdf = pd.read_hdf(cachedpath, "cells")

    axes = subfig_normalised_histos.get_figure(cellsdf, file_df, axes, time,
                                               location, list_of_histos)
    axes[0].legend()

    axhisto.text(hisletter_lab[0],
                 hisletter_lab[1],
                 letters[0],
                 transform=axhisto.transAxes,
                 **letter_settings)

    axhisto.set_ylabel("Percentage of cells")

    axhisto.set_xlim(0, gmax)
    axhisto.set_ylim(0, 8.5)
    axhisto.tick_params(axis="x", which="both",
                        direction="out")  # , length=2, pad=0)
    axhisto.tick_params(axis="y", which="both",
                        direction="out")  # , length=2, pad=0)
    axhisto.yaxis.set_major_locator(mticker.MaxNLocator(nbins=3, integer=True))
    axhisto.set_xlabel("Normalised cell fluorecence")

    filename = "demo_longtail"
    fig.subplots_adjust(left=0.05,
                        right=0.95,
                        top=0.99,
                        bottom=0.1,
                        hspace=0.08,
                        wspace=0.20)
    width, height = figure_util.get_figsize(figure_util.fig_width_small_pt,
                                            wf=1.0,
                                            hf=0.9)
    fig.set_size_inches(width, height)
    figure_util.save_figures(fig, filename, ["png", "pdf"], this_dir)

示例#16

0

显示文件

def main():
    this_dir = os.path.dirname(__file__)
    basedir = os.path.join(this_dir, "../../datasets/LSM700_63x_sigb")
    #cell_df = pd.read_hdf(os.path.join(basedir, "edge_redo_lh1segment_data_bg_back_bleed.h5"), "cells")
    cell_df = pd.read_hdf(os.path.join(basedir, "single_cell_data.h5"), "cells")
    print(cell_df.columns)

    file_df = filedb.get_filedb(os.path.join(basedir, "file_list.tsv"))
    file_df.loc[file_df["time"] == 26.0, ['time']] = 24.0
    file_df.loc[file_df["time"] == 38.0, ['time']] = 36.0
    
    USE_CACHE_PLOTS = False


    time = 48
    location = "center"
    slice_srt, slice_end = 5, 7 

    fig, axhisto = plt.subplots(1, 1)

    strain_map, des_strain_map = strainmap.load()
    gchan = "green_raw_bg_autofluor_bleedthrough_meannorm"

    gmax_val = 20
    nbins=150

    gbins = (0, gmax_val, nbins)

    percentile = 0
    list_of_histos = [ 
            ("wt_sigar_sigby", gchan, "WT P$_{sigB}$-YFP", strain_color["JLB021"]),
            ("delqp_sigar_sigby", gchan, "ΔrsbQP P$_{sigB}$-YFP", strain_color["JLB039"]),
            ("delru_sigar_sigby", gchan, "ΔrsbRU P$_{sigB}$-YFP", strain_color["JLB088"]),
            ("2xqp_sigar_sigby", gchan, "2$\\times$rsbQP P$_{sigB}$-YFP", strain_color["JLB095"]),
    ]
    print("-----------")
    lelines = []
    lelabs = []
    for i, (strain, chan, label, color) in enumerate(list_of_histos):
        print(label)
        fids = file_df[(file_df["time"] == time) &
                    (file_df["location"] == location) &
                    (file_df["strain"] == des_strain_map[strain])].index
        strain_df = cell_df[cell_df["global_file_id"].isin(fids)]
        #strain_df = get_strain(file_df, cell_df, strain) 
        plot_args = {"color":color, "max_min":"none", "mode_mean":False}
        tbins = gbins
        dset = time, location, strain

        args = (axhisto, strain_df, chan, tbins, (slice_srt, slice_end), dset, percentile, USE_CACHE_PLOTS, this_dir, plot_args)
        axhisto, line, _ = subfig_indivfile_histo.get_figure(*args)
        lelines += [line]
        lelabs += [label]
    axhisto.legend(lelines, lelabs)
        
    axhisto.set_xlabel("Normalised cell fluorecence (bleed through subtracted)")

    axhisto.set_ylabel("Percentage of cells") 
    axhisto.set_ylim(0, 7)
    axhisto.set_xlim(0, gmax_val)
        

    filename = "sup_bleed_histo"
    fig.subplots_adjust(left=0.1, right=0.9, top = 0.98, bottom=0.2)#, hspace=0.35, wspace=0.2)
    width, height = figure_util.get_figsize(figure_util.fig_width_small_pt, wf=1.0, hf=0.5)
    fig.set_size_inches(width, height)# common.cm2inch(width, height))

    figure_util.save_figures(fig, filename, ["png", "pdf"], this_dir)

示例#17

0

显示文件

文件： giant63_simple_agregate.py 项目： npmurphy/biofilm_pulse

import os.path
from glob import glob

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-f', '--files', nargs='+')
    parser.add_argument('-db', '--filedb')
    parser.add_argument('--bad_db')
    parser.add_argument('-o', '--output', default="output.h5")
    parser.add_argument('-a', '--append', action="store_true", default=False)
    parser.add_argument('--remove_from_path', default="")
    parser.add_argument('--data')  # "spores or "cells"

    pa = parser.parse_args()

    file_df = filedb.get_filedb(pa.filedb)
    if pa.bad_db:
        bad_df = filedb.get_filedb(pa.bad_db)
    else:
        bad_df = None

    print(bad_df)

    if len(pa.files) == 1 and "*" in pa.files[0]:
        pa.files = glob(pa.files[0], recursive=True)
    pa.files = [f for f in pa.files if f[-4:] == ".tsv"]

    def read_tsv(path):
        try:
            df = pd.read_csv(path, sep="\t")
            #basename = os.path.splitext(os.path.basename(path))[0]