def RunCellAssign(sce, annot_sce): _rho_csv = os.path.join(os.path.split(sce)[0], "rho_csv_sub.csv") _fit = os.path.join(os.path.split(sce)[0], "fit_sub.pkl") sampleid = sce.split("/")[-2] CellAssign.run(sce, config.rho_matrix, _fit, rho_csv=_rho_csv) filtered_sce = os.path.join(os.path.split(sce)[0], "sce_cas.rdata") shutil.copyfile(filtered_sce, annot_sce)
def Run(sampleid, before, finished): tenx = TenxDataStorage(sampleid, version="v3") tenx.download() analysis_path = tenx.tenx_path tenx_analysis = TenxAnalysis(analysis_path) tenx_analysis.load() tenx_analysis.extract() qc = QualityControl(tenx_analysis, sampleid) CellAssign.run(qc.sce, config.rho_matrix, ".cache/{}/celltypes.rdata".format(sampleid)) open(finished, "w").write("Completed")
def RunCellAssign(custom_output, sce, annot_sce): sample = json.loads(open(custom_output,"r").read()) sampleid, path = list(sample.items()).pop() temp = os.path.split(annot_sce)[0] _rho_csv = os.path.join(os.path.split(sce)[0],"rho_csv_sub.csv") _fit = os.path.join(os.path.split(sce)[0],"fit_sub.pkl") _filtered_sce = os.path.join(os.path.split(sce)[0],"sce_cas.rdata") filtered_sce = os.path.join(config.jobpath,"results","sce_cas_{}.rdata".format(sampleid)) if not os.path.exists(filtered_sce): rho = os.path.join(config.jobpath,config.rho_matrix) CellAssign.run(sce, rho, _fit, rho_csv=_rho_csv,lsf=True) shutil.copyfile(_filtered_sce, filtered_sce) shutil.copyfile(filtered_sce, annot_sce)
def RunHRD(custom_input, sce, rdata, umap): sample = json.loads(open(custom_input, "r").read()) sampleid, path = list(sample.items()).pop() rdata_cached = os.path.join(config.jobpath, "results", "hrd_{}.rdata".format(sampleid)) umap_cached = os.path.join(config.jobpath, "results", "hrd_umap_{}.png".format(sampleid)) temp = os.path.split(sce)[0] rho_csv = os.path.join(os.path.split(sce)[0], "hrd_rho_csv.csv") fit = os.path.join(os.path.split(sce)[0], "fit_sub_hrd.pkl") filtered_sce = os.path.join(os.path.split(sce)[0], "hrd_sce_cas.rdata") if not os.path.exists(rdata_cached): rho = "/codebase/hrd.yaml" CellAssign.run(sce, rho, fit, rho_csv=rho_csv, lsf=True, script_prefix="hrd_") shutil.copyfile(filtered_sce, rdata_cached) rcode = """ library(Seurat) library(ggplot2) fit <- readRDS("{fit}") sce <- readRDS("{sce}") sce$repairtype <- fit$cell_type sce$HRD_prob <- fit$mle_params$gamma[,"HRD"] sce$HR_prob <- fit$mle_params$gamma[,"HR"] seurat <- as.Seurat(sce, counts = "counts", data = "logcounts") seurat$repairtype <- sce$repairtype png("{umap}", width=1000, height=1000) DimPlot(object = seurat, reduction = "UMAP", group.by = "repairtype", dark.theme=TRUE, plot.title="{sample}") dev.off() saveRDS(sce, file="{sce}") """ qc_script = os.path.join(temp, "hrd_viz_{}.R".format(sampleid)) output = open(qc_script, "w") output.write( rcode.format(sce=rdata_cached, umap=umap_cached, sample=sampleid, fit=fit)) output.close() subprocess.call(["Rscript", "{}".format(qc_script)]) shutil.copyfile(rdata_cached, rdata) shutil.copyfile(umap_cached, umap)
def RunCellAssign(sce, annot_sce): filtered_sce = os.path.join(os.path.split(sce)[0], "sce_cas.rdata") _rho_csv = os.path.join(os.path.split(sce)[0], "rho_csv_sub.csv") _fit = os.path.join(os.path.split(sce)[0], "fit.rdata") sampleid = sce.split("/")[-2] if not os.path.exists(filtered_sce): CellAssign.run(sce, config.rho_matrix, _fit, rho_csv=_rho_csv, lsf=False) shutil.copyfile(filtered_sce, annot_sce) path = os.getcwd() shutil.copyfile(_fit, os.path.join(path, "fit.rdata"))
def RunCellAssign(sce, annot_sce, cellfit): _rho_csv = os.path.join(os.path.split(sce)[0], "rho_csv_sub.csv") _fit = os.path.join(os.path.split(sce)[0], "fit_sub.pkl") sampleid = sce.split("/")[-2] filtered_sce = os.path.join(os.path.split(sce)[0], "sce_cas.rdata") if not os.path.exists(filtered_sce) or not os.path.exists(_fit): if "CD45N" in sampleid: rho = config.negative_rho_matrix elif "CD45P" in sampleid: rho = config.positive_rho_matrix else: rho = config.rho_matrix CellAssign.run(sce, rho, _fit, rho_csv=_rho_csv) shutil.copyfile(filtered_sce, annot_sce) shutil.copyfile(_fit.replace("fit_sub", "cell_types"), cellfit)
def RunExhaustion(custom_input, sce, rdata, umap): sample = json.loads(open(custom_input, "r").read()) sampleid, path = list(sample.items()).pop() rdata_cached = os.path.join(config.jobpath, "results", "exhaustion_{}.rdata".format(sampleid)) umap_cached = os.path.join(config.jobpath, "results", "exhaustion_umap_{}.png".format(sampleid)) temp = os.path.split(sce)[0] _rho_csv = os.path.join(os.path.split(sce)[0], "exhaustion_rho_csv.csv") _fit = os.path.join(os.path.split(sce)[0], "fit_sub_exhaustion.pkl") _filtered_sce = os.path.join( os.path.split(sce)[0], "exhaustion_sce_cas.rdata") if not os.path.exists(rdata_cached): rho = "/codebase/exhaustion.yaml" CellAssign.run(sce, rho, _fit, rho_csv=_rho_csv, lsf=True, script_prefix="exhaustion_") shutil.copyfile(_filtered_sce, rdata_cached) shutil.copyfile(rdata_cached, rdata) rcode = """ library(Seurat) library(ggplot2) sce <- readRDS("{sce}") fit <- readRDS("{fit}") sce$Exhaustion_prob <- fit$mle_params$gamma[,"Exhausted.T.cell"] seurat <- as.Seurat(sce, counts = "counts", data = "logcounts") seurat$Exhaustion_prob <- sce$Exhaustion_prob png("{umap}", width=1000, height=1000) FeaturePlot(object = seurat, reduction = "UMAP", features=c("Exhaustion_prob")) dev.off() """ qc_script = os.path.join(temp, "exhaustion_viz_{}.R".format(sampleid)) output = open(qc_script, "w") output.write( rcode.format(sce=rdata, umap=umap_cached, sample=sampleid, fit=_fit)) output.close() subprocess.call(["Rscript", "{}".format(qc_script)]) shutil.copyfile(umap_cached, umap)
def Run(sampleid, before, finished, use_corrected=False): if use_corrected and os.path.exists(".cache/corrected/"): sce = ".cache/corrected/corrected_sce.rdata" if not os.path.exists(sce): utils = DropletUtils() utils.read10xCounts(".cache/corrected/", ".cache/corrected/corrected_sce.rdata") else: tenx = TenxDataStorage(sampleid, version="v3") tenx.download() analysis_path = tenx.tenx_path tenx_analysis = TenxAnalysis(analysis_path) tenx_analysis.load() tenx_analysis.extract() qc = QualityControl(tenx_analysis, sampleid) sce = qc.sce if not os.path.exists(".cache/{}/celltypes.rdata".format(sampleid)): CellAssign.run(sce, config.rho_matrix, ".cache/{}/celltypes.rdata".format(sampleid)) open(finished, "w").write("Completed")
def Run(sampleid, raw_sce, sce_cas, rho, B, min_delta): output = os.path.join(os.path.split(raw_sce)[0], "sce_cas.rdata") celltypes = os.path.join(os.path.split(raw_sce)[0], "celltypes.rdata") if not os.path.exists(celltypes): CellAssign.run(raw_sce, rho, celltypes, B, min_delta) shutil.copyfile(output, sce_cas)