Пример #1
0
def diffexp(branch: "CellBranch", run_name: str):
    # TODO: refactor both diffexp versions into `_get_diffexp_args`
    input_metadata_path = branch.get_temp_meta_path(run_name)
    run_spec = branch.spec[run_name]
    params = run_spec.params
    process_run = branch[run_name]
    output_diffexp_path = process_run.path_map["diffexp_result"]
    root_dir = str(branch.root)
    spec_str = branch.spec.shell_str
    groups = branch[run_name].branch.meta["partition_code"].unique().astype(
        "O")
    if len(groups) != 2:
        raise ValueError(
            f"Exactly two groups required for diffexp. Got: {groups}")
    ident1 = groups.min()
    ident2 = groups.max()
    groupby = "partition_code"
    arg_list = [
        input_metadata_path,
        output_diffexp_path,
        root_dir,
        spec_str,
        params["test"],
        params["logfc_thresh"],
        ident1,
        ident2,
        groupby,
        N_CPUS,
        # R_FUNCTIONS_FILEPATH,
    ]
    run_r_script_logged(branch, R_DIFFEXP_SCRIPT, arg_list, run_name)
Пример #2
0
def reduce(branch: "CellBranch", run_name: str):
    input_metadata_path = branch.get_temp_meta_path(run_name)
    run_spec = branch.spec[run_name]
    params = run_spec.params
    process_run = branch[run_name]
    input_rds_path = process_run.path_map_prior["rna_r"]
    output_embeddings_path = process_run.path_map["pca_embeddings"]
    output_loadings_path = process_run.path_map["pca_loadings"]
    output_stdev_path = process_run.path_map["pca_stdev"]
    npcs = params["pca_npcs"]
    # r_functions_filepath = R_FUNCTIONS_FILEPATH
    arg_list = [
        input_metadata_path,
        input_rds_path,
        output_embeddings_path,
        output_loadings_path,
        output_stdev_path,
        npcs,
        # r_functions_filepath,
    ]
    run_r_script_logged(branch, R_PCA_SCRIPT, arg_list, run_name)
    meta = run_umap(
        output_embeddings_path,
        n_neighbors=params["umap_n_neighbors"],
        min_dist=params["umap_min_dist"],
        n_components=params["umap_n_components"],
        metric=params["umap_metric"],
    )
    meta.index = branch.meta.index
    output_meta_path = process_run.path_map["meta"]
    meta.to_csv(output_meta_path, sep="\t")
Пример #3
0
def markers(branch: "CellBranch", run_name: str):
    input_metadata_path = branch.get_temp_meta_path(run_name)
    run_spec = branch.spec[run_name]
    params = run_spec.params
    process_run = branch[run_name]
    output_markers_path = process_run.path
    root_dir = str(branch.root)
    spec_str = branch.spec.shell_str
    cluster_counts = branch.meta["cluster_id"].value_counts()
    deficient_clusters = cluster_counts[cluster_counts < 2].index.tolist()
    if deficient_clusters:
        raise ValueError(
            f"Deficient clusters {deficient_clusters} with fewer than 3 "
            f"cells. Adjust clustering parameters.\n{cluster_counts}")
    arg_list = [
        input_metadata_path,
        output_markers_path,
        root_dir,
        spec_str,
        params["logfc_thresh"],
        params["test"],
        N_CPUS,
        # R_FUNCTIONS_FILEPATH,
    ]
    run_r_script_logged(branch, R_MARKERS_SCRIPT, arg_list, run_name)
Пример #4
0
def normalize(branch: "CellBranch", run_name: str):
    """
    Performs:
        - cell filtering by `min_genes`, `max_genes`, and `perc_mito_cutoff`
        - gene filtering by `min_cells` expressing
        - normalization via either:
            - seurat default
            - sctransform
    Params:
        min_genes (int):
        max_genes (int):
        min_cells (int):
        perc_mito_cutoff (int, float):
        method (str): from {"seurat_default", "sctransform"}
        nfeatures (int): (seurat_default only)
    """
    input_metadata_path = branch.get_temp_meta_path(run_name)
    # TODO: add a root filepaths lookup
    run_spec = branch.spec[run_name]
    params = run_spec.params
    process_run = branch[run_name]
    input_rds_path = process_run.path_map_prior["rna_r"]
    output_rds_path = process_run.path_map["rna_r"]
    min_genes = params["min_genes"]
    max_genes = params["max_genes"]
    min_cells = params["min_cells"]
    perc_mito_cutoff = params["perc_mito_cutoff"]
    method = params["method"]
    arg_list = [
        input_metadata_path,
        input_rds_path,
        output_rds_path,
        min_genes,
        max_genes,
        min_cells,
        perc_mito_cutoff,
        # R_FUNCTIONS_FILEPATH,
    ]
    if method == "sctransform":
        output_corrected_umi_path = process_run.path_map["corrected_umi"]
        output_pearson_residual_path = process_run.path_map["pearson_residual"]
        arg_list += [output_corrected_umi_path, output_pearson_residual_path]
        r_normalize_script = R_SCTRANSFORM_SCRIPT
    elif method == "seurat_default":
        verbose = True
        verbose = str(verbose).upper()
        nfeatures = params["nfeatures"]
        arg_list += [verbose, nfeatures]
        r_normalize_script = R_SEURAT_DEFAULT_NORM_SCRIPT
    else:
        raise ValueError(
            f"Invalid normalization method: {method}. Use 'sctransform' or 'seurat_default'"
        )
    run_r_script_logged(branch, r_normalize_script, arg_list, run_name)
Пример #5
0
def cluster(branch: "CellBranch", run_name: str):
    input_metadata_path = branch.get_temp_meta_path(run_name)
    # TODO: add a root filepaths lookup
    run_spec = branch.spec[run_name]
    params = run_spec.params
    process_run = branch[run_name]
    output_clusters_path = process_run.path_map["meta"]
    # TODO: may actually need way to pass spec through
    root_dir = str(branch.root)
    spec_str = branch.spec.shell_str
    arg_list = [
        input_metadata_path,
        output_clusters_path,
        root_dir,
        spec_str,
        params["num_pcs"],
        params["res"],
        params["eps"],
        # R_FUNCTIONS_FILEPATH,
    ]
    run_r_script_logged(branch, R_CLUSTER_SCRIPT, arg_list, run_name)
Пример #6
0
def plot_cell_cycle_scoring_scat(branch: "CellBranch", r_script: str,
                                 args: list, **kwargs):
    run_r_script_logged(branch, r_script, args, "plot_cell_cycle_scoring_scat")
Пример #7
0
def plot_umap_embeddings_scat_r(branch: "CellBranch", r_script: str,
                                args: list, **kwargs):
    run_r_script_logged(branch, r_script, args, "plot_umap_embeddings_scat")
Пример #8
0
def plot_pca_loadings_scat(branch: "CellBranch", r_script: str, args: list,
                           **kwargs):
    run_r_script_logged(branch, r_script, args, "plot_pca_loadings_scat")
Пример #9
0
def plot_pca_elbow_curv(branch: "CellBranch", r_script: str, args: list,
                        **kwargs):
    run_r_script_logged(branch, r_script, args, "plot_pca_elbow_curv")
Пример #10
0
def plot_perc_hsp_per_cell_vln_r(branch: "CellBranch", r_script: str,
                                 args: list, **kwargs):
    run_r_script_logged(branch, r_script, args, "plot_perc_hsp_per_cell_vln_r")
Пример #11
0
def plot_umis_vs_perc_ribo_scat_r(branch: "CellBranch", r_script: str,
                                  args: list, **kwargs):
    run_r_script_logged(branch, r_script, args, "plot_umis_vs_perc_ribo_scat")
Пример #12
0
def plot_marker_genes_per_cluster_bar(branch: "CellBranch", r_script: str,
                                      args: list, **kwargs):
    args.append(branch.current_path)
    run_r_script_logged(branch, r_script, args,
                        "plot_marker_genes_per_cluster_bar")
Пример #13
0
def plot_umis_per_barcode_rank_curv(branch: "CellBranch", r_script: str,
                                    args: list, **kwargs):
    run_r_script_logged(branch, r_script, args, branch.current_process)
Пример #14
0
def plot_highest_exprs_dens(branch: "CellBranch", r_script: str, args: list,
                            **kwargs):
    run_r_script_logged(branch, r_script, args, "plot_highest_exprs_dens")