示例#1
0
def plot_by_genes(rdata, tenx_analysis, genes, prefix, rep, pcs):
    tenx = TenxAnalysis(tenx_analysis)
    tenx.load()
    sce = SingleCellExperiment.fromRData(rdata)
    tsne_dims = sce.getReducedDims(rep)
    barcodes = sce.colData["Barcode"]
    transcripts = sce.rowData["Symbol"]
    adata = tenx.create_scanpy_adata(barcodes=barcodes,
                                     transcripts=transcripts)
    x_coded = dict(zip(barcodes, tsne_dims[0]))
    y_coded = dict(zip(barcodes, tsne_dims[1]))
    if not os.path.exists("figures/expression"):
        os.makedirs("figures/expression")
    x = []
    y = []
    for barcode in barcodes:
        x.append(x_coded[barcode])
        y.append(y_coded[barcode])
    for gene in genes:
        expression = []
        for barcode in barcodes:
            val = adata[barcode, gene].X
            expression.append(float(val))
        f, ax = plt.subplots(figsize=(10, 8))
        sns.scatterplot(x=x, y=y, hue=expression, alpha=0.85)
        ax.set_title("{} Counts".format(gene))
        ax.legend()
        plt.tight_layout()
        plt.savefig("figures/expression/expression_{}.png".format(gene))
示例#2
0
def umap_by_gene(rdata, gene, prefix, pcs):
    tenx = TenxAnalysis(tenx_analysis)
    tenx.load()
    sce = SingleCellExperiment.fromRData(rdata)
    tsne_dims = sce.reducedDims["UMAP"]
    barcodes = sce.colData["Barcode"]
    transcripts = sce.rowData["Symbol"]
    adata = tenx.create_scanpy_adata(barcodes=barcodes, transcripts=symbols)
    assert len(barcodes) == len(adata[:, gene])
    expression = dict(zip(barcodes, adata[:, gene]))
    tsne_dims = numpy.array(tsne_dims).reshape(2, len(barcodes))
    x_coded = dict(zip(barcodes, tsne_dims[0]))
    y_coded = dict(zip(barcodes, tsne_dims[1]))
    x = []
    y = []
    clusters = []
    for barcode in barcodes:
        clusters.append(float(expression[barcode]))
        x.append(x_coded[barcode])
        y.append(y_coded[barcode])
    f, ax = plt.subplots(figsize=(10, 8))
    sns.scatterplot(x=x, y=y, hue=clusters, alpha=0.85)
    ax.set_title("PCA - Clusters - {}".format(prefix))
    ax.legend()
    plt.tight_layout()
    plt.savefig("figures/umap_by_{}.png".format(gene))
示例#3
0
def upload_tenx(sampleid, before, finished):
    print("Calling upload.")
    tenx = TenxAnalysis("./{}/outs/".format(sampleid))
    tenx.finalize()
    tenxds = TenxDataStorage(sampleid)
    tenxds.upload_cellranger(tenx)
    open(finished, "w").write("Completed")
示例#4
0
def Run(sampleid, finished):
    if not os.path.exists("cellranger.complete"):
        CellRanger.count([sampleid])
        tenx = TenxAnalysis("./{}/outs/".format(sampleid))
        tenx.finalize()
        tenxds = TenxDataStorage(sampleid)
        tenxds.upload_cellranger(tenx)
    open(finished,"w").write("Completed")
示例#5
0
 def test_tenx_full_analysis(self):
     tenx = TenxAnalysis("tests")
     print("Reading Counts")
     sce = TenX.read10xCounts(tenx)
     # rdata = os.path.join(base_dir, "tests/example_sce.RData")
     # sce = SingleCellExperiment.fromRData(rdata)
     tenx = TenX()
     print("Generating Scater Analysis")
     scater_analysis = tenx.analysis(sce)
def RunUpload(sampleid, finished, species):
    print("Uploading ",species, sampleid)
    tenx_output = os.path.join(config.jobpath,"{}/outs/".format(sampleid))
    tenx = TenxAnalysis(tenx_output)
    tenx.finalize()
    tenxds = TenxDataStorage(sampleid, species=species)
    print("Running upload")
    tenxds.upload_cellranger(tenx)
    open(finished,"w").write("Completed")
示例#7
0
def cluster_markers(rdata, tenx_analysis, rep, pcs, embedding_file, prefix):
    tenx = TenxAnalysis(tenx_analysis)
    tenx.load()
    sce = SingleCellExperiment.fromRData(rdata)
    markers = tenx.markers_by_clusters(sce, rep="PCA", pcs=pcs)
    markers_by_cluster = list(zip(*markers["rank_genes_groups"]["names"]))
    for i, markers in enumerate(markers_by_cluster):
        cluster_prefix = "Cluster {} {}".format(i, prefix)
        plot_by_markers(rdata, tenx_analysis, markers, cluster_prefix, rep,
                        pcs, embedding_file)
示例#8
0
def scvis_by_cluster_markers(rdata, tenx_analysis, prefix, pcs,
                             embedding_file):
    try:
        tenx = TenxAnalysis(tenx_analysis)
        tenx.load()
        sce = SingleCellExperiment.fromRData(rdata)
        cluster_labels = tenx.markers_by_clusters(
            sce, rep="SCVIS", pcs=pcs, embedding_file=embedding_file)
    except Exception as e:
        return
示例#9
0
def Run(sampleid, before, finished):
    adatas = Search(sampleid)
    print ("Correcting on {} samples.".format(len(adatas)))
    sys.stdout.flush()
    corrected = Scanorama.correct(adatas)
    sys.stdout.flush()
    if not os.path.exists(".cache/corrected"):
        os.makedirs(".cache/corrected")
    TenxAnalysis.make_10x_output(corrected[0],".cache/corrected")
    open(finished,"w").write("Completed")
示例#10
0
def main():
    sample = "patient2"

    tenx = TenxDataStorage(sample, version="v2")
    tenx.download()
    tenx_analysis = TenxAnalysis(tenx.tenx_path)
    tenx_analysis.load()
    output = "/igo_large/scratch/test_kallisto"
    fastq_directory = FastQDirectory(
        "/igo_large/scratch/allen/bams/xfastqs2/McGilvery_Sonya__TLH_MissingLibrary_1_CB8R9ANXX/",
        sample, output)

    krunner = Kallisto(fastq_directory, tenx_analysis)
    krunner.de()
示例#11
0
def cell_type_by_cluster(rdata, cell_assign_fit, tenx_analysis, prefix):
    tenx = TenxAnalysis(tenx_analysis)
    tenx.load()
    fit = pickle.load(open(cell_assign_fit, "rb"))
    cell_types = dict(zip(fit["Barcode"], fit["cell_type"]))
    sce = SingleCellExperiment.fromRData(rdata)
    cluster_labels = tenx.clusters(sce)
    clusters = dict(zip(sce.colData["Barcode"], cluster_labels))
    data_by_cluster = collections.defaultdict(list)
    data_by_celltype = collections.defaultdict(list)
    cluster = []
    cell_type = []
    for barcode, cell in cell_types.items():
        try:
            cluster.append(str(clusters[barcode]))
            cell_type.append(cell)
            data_by_celltype[cell] = str(clusters[barcode])
            data_by_cluster[str(clusters[barcode])] = cell
        except Exception as e:
            continue
    f, ax = plt.subplots(figsize=(16, 8))
    counts = collections.defaultdict(lambda: collections.defaultdict(int))
    for cluster, ctype in zip(cluster, cell_type):
        counts[cluster][ctype] += 1
    fclusters = []
    fcelltypes = []
    fpercentages = []
    for cluster, ctype in counts.items():
        total = float(sum(ctype.values()))
        for cell in cell_type:
            fcelltypes.append(cell)
            fclusters.append(cluster)
            if cell in ctype:
                fpercentages.append(float(ctype[cell]) / total)
            else:
                fpercentages.append(0.0)
    df = pandas.DataFrame({
        "Cluster": fclusters,
        "Cell Type": fcelltypes,
        "Percentage": fpercentages
    })
    ax = sns.barplot(x="Cluster",
                     y="Percentage",
                     hue="Cell Type",
                     data=df,
                     palette="tab10")
    ax.set_title("Cell Type by Cluster - {}".format(prefix))
    plt.tight_layout()
    plt.savefig("figures/cell_type_by_cluster.png")
示例#12
0
 def test_symbol_retrieve(self):
     tenx = TenxAnalysis("tests/pre_igo")
     sce = TenX.read10xCounts(tenx)
     print(sce.rowData.keys())
     example_rda = os.path.join(base_dir, "tests/example_sce.rda")
     sce = SingleCellExperiment.fromRData(example_rda)
     print(sce.rowData.keys())
     tenx = DropletUtils()
     rs4_result = tenx.read10xCounts("tests/hg19/")
     sce = SingleCellExperiment.fromRS4(rs4_result)
     print(sce.rowData.keys())
     example_rda = os.path.join(base_dir, "tests/example_copy_number.rda")
     sce = SingleCellExperiment.fromRData(example_rda)
     print(sce.rowData.keys())
     print(sce.rownames)
     print(sce.colnames)
示例#13
0
 def get_tenx(samples):
     tenxs = []
     for sample in samples:
         tenx = TenxDataStorage(sample)
         tenx.download()
         tenxs.append(TenxAnalysis(tenx.tenx_path))
     return tenxs
示例#14
0
 def count(fastq_object):
     args = dict()
     args["id"] = fastq_object.id
     args["fastqs"] = fastq_object.path
     args["sample"] = fastq_object.samples.sampleid[0]
     args["transcriptome"] = config.reference
     args["lanes"] = fastq_object.samples.lane[0]
     #args["chemistry"] = "SC3P_auto"
     cmd = CellRanger.cmd("count",args)
     subprocess.call(cmd)
     return TenxAnalysis(fastq_object.out())
示例#15
0
 def test_cell_assign_pkl(self):
     import pickle
     import collections
     tenx = TenxAnalysis("tests/pre_igo")
     sce = TenX.read10xCounts(tenx)
     handle = open("tests/rho_up.pkl","rb")
     rho_matrix = pickle.load(handle)
     handle.close()
     rho = GeneMarkerMatrix(rho_matrix)
     cellassigner = CellAssign()
     res = cellassigner.run_em(sce, rho)
示例#16
0
def Analysis(sampleid, before, finished):
    tenx = TenxDataStorage(sampleid, version="v3")
    tenx.download()
    analysis_path = tenx.tenx_path
    tenx_analysis = TenxAnalysis(analysis_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis, sampleid)
    cellassign_analysis = ".cache/{}/cellassignanalysis/".format(sampleid)
    if not os.path.exists(cellassign_analysis):
        os.makedirs(cellassign_analysis)
    pyfit = os.path.join(".cache/{}/cell_types.pkl".format(sampleid))
    assert os.path.exists(pyfit), "No Pyfit Found."
    pyfit = pickle.load(open(pyfit, "rb"))
    marker_list = GeneMarkerMatrix.read_yaml(config.rho_matrix)
    cell_types = marker_list.celltypes()
    if "B cell" not in cell_types: cell_types.append("B cell")
    celltypes(pyfit, sampleid, cellassign_analysis, known_types=cell_types)
    tsne_by_cell_type(qc.sce,
                      pyfit,
                      sampleid,
                      cellassign_analysis,
                      known_types=cell_types)
    umap_by_cell_type(qc.sce,
                      pyfit,
                      sampleid,
                      cellassign_analysis,
                      known_types=cell_types)
    open(finished, "w").write("Completed")
示例#17
0
def Run(sampleid, species, umi_plot, mito_plot, ribo_plot, counts_plot,
        raw_sce):
    print("Running QC.")
    tenx = TenxDataStorage(sampleid)
    tenx_analysis = TenxAnalysis(tenx.tenx_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis, sampleid)
    qc.run(mito=config.mito)
    plots = qc.plots
    umi = os.path.join(plots, "umi.png")
    mito = os.path.join(plots, "mito.png")
    ribo = os.path.join(plots, "ribo.png")
    counts = os.path.join(plots, "counts.png")
    cvf = os.path.join(plots, "total_counts_v_features.png")

    results = os.path.join(config.jobpath, "results")
    if not os.path.exists(results):
        os.makedirs(results)

    shutil.copyfile(umi, umi_plot)
    shutil.copyfile(mito, mito_plot)
    shutil.copyfile(ribo, ribo_plot)
    shutil.copyfile(counts, counts_plot)
    shutil.copyfile(qc.sce, raw_sce)
示例#18
0
def Run(sampleid, before, finished):
    tenx = TenxDataStorage(sampleid, version="v3")
    tenx.download()
    tenx_analysis = TenxAnalysis(tenx.tenx_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis,sampleid)
    plots = qc.plots
    cellassign = os.path.join(os.path.split(plots)[0],"cellassignanalysis")
    results = Results(config.jobpath)

    results.add_analysis(tenx.tenx_path)
    results.add_sce(qc.qcdsce)

    umi = os.path.join(plots,"umi.png")
    mito = os.path.join(plots,"mito.png")
    ribo = os.path.join(plots, "ribo.png")
    total_counts = os.path.join(plots, "total_counts.png")
    tfbc = os.path.join(plots, "total_features_by_counts.png")
    tcvfc = os.path.join(plots, "total_counts_v_features_by_counts.png")
    celltypes = os.path.join(cellassign, "cell_types.png")

    results.add_plot(umi,"UMI Distribution")
    results.add_plot(mito,"Mito Distribution")
    results.add_plot(ribo,"Ribo Distribution")
    results.add_plot(total_counts,"Total Counts Distribution")
    results.add_plot(tcvfc,"Total Counts")
    results.add_plot(tcvfc,"Total Features by Counts")
    results.add_plot(celltypes,"Cell Types")

    exportMD(results)
    exportUpload(results)
    open(finished,"w").write("Completed")
示例#19
0
def RunExtract(sample_to_path, rdata_path):
    sample = json.loads(open(sample_to_path, "r").read())
    sampleid, path = list(sample.items()).pop()
    tenx_analysis = TenxAnalysis(path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis, sampleid)
    if not os.path.exists(qc.sce):
        qc.run(mito=config.mito)
    shutil.copyfile(qc.sce, rdata_path)
示例#20
0
def Run(sampleid, before, finished):
    tenx = TenxDataStorage(sampleid, version="v3")
    tenx.download()
    analysis_path = tenx.tenx_path
    tenx_analysis = TenxAnalysis(analysis_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis, sampleid)
    CellAssign.run(qc.sce, config.rho_matrix,
                   ".cache/{}/celltypes.rdata".format(sampleid))
    open(finished, "w").write("Completed")
示例#21
0
 def __init__(self, sampleids, chem="v2", output="./"):
     self.output = output
     self.samples = sampleids
     self.tenxs = []
     for sampleid in self.samples:
         tenx = TenxDataStorage(sampleid, version=chem)
         tenx.download()
         tenx_analysis = TenxAnalysis(tenx.tenx_path)
         tenx_analysis.load()
         tenx_analysis.extract()
         self.tenxs.append(tenx_analysis)
示例#22
0
def Run(sampleid, before, finished):
    print("Running QC.")
    tenx = TenxDataStorage(sampleid, version="v3")
    tenx.download()
    tenx_analysis = TenxAnalysis(tenx.tenx_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    print("Extracted.")
    qc = QualityControl(tenx_analysis, sampleid)
    qc.run(mito=config.mito)
    print("Uploading")
    qc.upload_raw()
    qc.upload()
    open(finished, "w").write("Completed")
示例#23
0
def Run(sampleid, before, finished):
    clustering = ".cache/{}/clustering/".format(sampleid)
    if not os.path.exists(clustering):
        os.makedirs(clustering)
    cluster_results = os.path.join(clustering,
                                   "{}_clusters.pkl".format(sampleid))
    tenx = TenxDataStorage(sampleid, version="v3")
    tenx.download()
    analysis_path = tenx.tenx_path
    tenx_analysis = TenxAnalysis(analysis_path)
    tenx_analysis.load()
    tenx_analysis.extract()
    qc = QualityControl(tenx_analysis, sampleid)
    if not os.path.exists(cluster_results):
        clusters = tenx_analysis.clusters(qc.sce)
        pickle.dump(clusters, open(cluster_results, "wb"))
    else:
        clusters = pickle.load(open(cluster_results, "rb"))
    tsne_by_cluster(qc.sce, clusters, sampleid, clustering)
    umap_by_cluster(qc.sce, clusters, sampleid, clustering)
    open(finished, "w").write("Completed")
示例#24
0
def Run(sampleid, before, finished, use_corrected=False):
    if use_corrected and os.path.exists(".cache/corrected/"):
        sce = ".cache/corrected/corrected_sce.rdata"
        if not os.path.exists(sce):
            utils = DropletUtils()
            utils.read10xCounts(".cache/corrected/",
                                ".cache/corrected/corrected_sce.rdata")
    else:
        tenx = TenxDataStorage(sampleid, version="v3")
        tenx.download()
        analysis_path = tenx.tenx_path
        tenx_analysis = TenxAnalysis(analysis_path)
        tenx_analysis.load()
        tenx_analysis.extract()
        qc = QualityControl(tenx_analysis, sampleid)
        sce = qc.sce
    if not os.path.exists(".cache/{}/celltypes.rdata".format(sampleid)):
        CellAssign.run(sce, config.rho_matrix,
                       ".cache/{}/celltypes.rdata".format(sampleid))
    open(finished, "w").write("Completed")
示例#25
0
def Analysis(sampleid, before, finished, use_corrected=False):
    if use_corrected and os.path.exists(".cache/corrected"):
        sce = ".cache/corrected/corrected_sce.rdata"
        if not os.path.exists(sce):
            utils = DropletUtils()
            utils.read10xCounts(".cache/corrected/",
                                ".cache/corrected/corrected_sce.rdata")
        filtered_sce = sce
    else:
        tenx = TenxDataStorage(sampleid, version="v3")
        tenx.download()
        analysis_path = tenx.tenx_path
        tenx_analysis = TenxAnalysis(analysis_path)
        tenx_analysis.load()
        tenx_analysis.extract()
        qc = QualityControl(tenx_analysis, sampleid)
        filtered_sce = os.path.join(os.path.split(qc.sce)[0], "sce_cas.rdata")
    cellassign_analysis = ".cache/{}/cellassignanalysis/".format(sampleid)
    if not os.path.exists(cellassign_analysis):
        os.makedirs(cellassign_analysis)
    pyfit = os.path.join(".cache/{}/cell_types.pkl".format(sampleid))
    assert os.path.exists(pyfit), "No Pyfit Found."
    pyfit = pickle.load(open(pyfit, "rb"))
    marker_list = GeneMarkerMatrix.read_yaml(config.rho_matrix)
    cell_types = marker_list.celltypes()
    if "B cell" not in cell_types: cell_types.append("B cell")
    celltypes(pyfit, sampleid, cellassign_analysis, known_types=cell_types)

    tsne_by_cell_type(filtered_sce,
                      pyfit,
                      sampleid,
                      cellassign_analysis,
                      known_types=cell_types)
    umap_by_cell_type(filtered_sce,
                      pyfit,
                      sampleid,
                      cellassign_analysis,
                      known_types=cell_types)
    open(finished, "w").write("Completed")
示例#26
0
def create_workflow():

    workflow = pypeliner.workflow.Workflow()

    bcl_directory = args.get("bcl", None)
    fastq_directories = args.get("fastqs")
    aggregate = args.get("aggregate_mlibs", list())
    agg_type = args.get("agg_method", "scanorama")
    libbase = args.get("lib_base", None)
    additional = args.get("additional", [])
    prefix = config.prefix
    output = config.jobpath
    recipe = args.get("recipe", "basic")

    try:
        cellranger_folder = os.path.join(output, prefix)
        os.makedirs(cellranger_folder)
    except Exception as e:
        pass

    if fastq_directories == None:
        fastq_directories = []

    results = Results(output)
    runner = PrimaryRun(workflow, prefix, output)
    """
    Aggregating Libraries
    """

    if aggregate != None and len(aggregate) > 0:
        if agg_type == "tenx":
            runner.aggregate_libraries_tenx(aggregate, libbase)
            args["tenx"] = os.path.join(output, "run_{}/outs".format(prefix))
        if agg_type == "scanorama":
            runner.aggregate_libraries_scanorama()
    """
    Setup
    """
    tenx_analysis = args.get("tenx", None)

    bcls = runner.set_bcl(bcl_directory)
    fastqs = runner.set_fastq(fastq_directories)
    workflow = runner.get_workflow()

    tenx_analysis = args.get("tenx", None)

    if fastqs != []:
        tenx_analysis = os.path.join(config.jobpath, prefix, "outs")

    rdata = args.get("rdata", None)

    secondary_analysis = SecondaryAnalysis(workflow, prefix, output)
    tenx = TenxAnalysis(tenx_analysis)
    """
    QC
    """

    secondary_analysis.run_scater()
    secondary_analysis.build_sce(tenx)
    secondary_analysis.set_rdata(rdata)

    results.add_analysis(tenx_analysis)
    results.add_workflow(secondary_analysis.rscript)
    results.add_sce(secondary_analysis.sce)

    umi = os.path.join(output, "figures/umi_distribution.png")
    mito = os.path.join(output, "figures/mito_distribution.png")
    ribo = os.path.join(output, "figures/ribo_distribution.png")
    freq = os.path.join(output, "figures/highestExprs.png")
    tech = os.path.join(output, "figures/mean_variance_trend.png")
    high_var = os.path.join(output, "figures/highly_variable_genes.png")

    results.add_plot(umi, "UMI Distribution")
    results.add_plot(mito, "Mito Distribution")
    results.add_plot(ribo, "Ribo Distribution")
    results.add_plot(freq, "Highest Frequency")
    results.add_plot(tech, "Mean Variance Trend")
    results.add_plot(high_var, "Highly Variable Genes")

    results.add_cellassign_pkl(secondary_analysis.cell_assign_fit)
    results.add_cellassign_raw(secondary_analysis.cell_assign_rdata)
    """
    Differential Expression
    """
    if config.run_de:
        other_samples = []
        for other_sample in compare:
            print("blah")
            exit(0)
            secondary_analysis.run_de(other_sample)
    """
    CellAssign
    """
    if config.run_cellassign:
        tenx = TenxAnalysis(tenx_analysis)
        if hasattr(config, "rho_matrix"):
            rho_matrix = eval(open(config.rho_matrix, "r").read())
        elif hasattr(config, "tissue"):
            sce = SingleCellExperiment.fromRData(secondary_analysis.sce)
            rho_matrix = generate_json(tenx, sce, config.organ)
        else:
            raise AssertionError("Not implemented.")
        secondary_analysis.run_cell_assign(rho_matrix,
                                           tenx_analysis,
                                           additional=combine_assign)
        results.add_cellassign_pkl(secondary_analysis.cell_assign_fit)
        results.add_cellassign_raw(secondary_analysis.cell_assign_rdata)

        path = secondary_analysis.plot_cell_types()
        results.add_plot(path, "Cell Type Frequency")
        path = secondary_analysis.plot_cell_type_by_cluster(tenx_analysis)
        results.add_plot(path, "Cell Type by Cluster")

        path = secondary_analysis.plot_tsne_by_cell_type()
        results.add_plot(path, "TSNE by Cell Type")

        path = secondary_analysis.plot_pca_by_cell_type()
        results.add_plot(path, "PCA by Cell Type")

        # path = secondary_analysis.plot_umap_by_cell_type()
        # results.add_plot(path, "UMAP by Cell Type")

        path1, path2 = secondary_analysis.marker_analysis(tenx, rho_matrix)
        results.add_plot(path1, "Heat Marker Gene Matrix")
        results.add_plot(path2, "Stacked Vin Marker Gene Matrix")
    """
    SCVis
    """
    if config.run_scvis:
        secondary_analysis.run_scviz(config.perplexity, config.components)
    """
    CloneAlign
    """
    if config.run_clonealign and config.copy_number_data is not None and config.clone_assignments is not None:
        secondary_analysis.run_clone_align(tenx, config.copy_number_data,
                                           config.clone_assignments)

    if config.plot_scvis:
        embedding_file = "{0}_{1}/perplexity_{0}_regularizer_0.001_batch_size_512_learning_rate_0.01_latent_dimension_2_activation_ELU_seed_1_iter_3000.tsv".format(
            config.perplexity, config.components)
        path = secondary_analysis.plot_scvis_by_cluster(tenx_analysis,
                                                        embedding_file,
                                                        pcs=config.components)
        path = os.path.join(output, path)
        results.add_plot(path, "SCVis by Cluster")

        if os.path.exists(config.run_cellassign):
            path = secondary_analysis.plot_scvis_by_cell_type(
                embedding_file, pcs=config.components)
            results.add_plot(path, "SCVIS by Cell Type")
    """
    Cluster Analysis
    """
    if config.clustering:
        path = secondary_analysis.plot_pca_by_cluster(tenx_analysis,
                                                      pcs=config.components)
        results.add_plot(path, "PCA by Cluster")

        path = secondary_analysis.plot_tsne_by_cluster(tenx_analysis,
                                                       pcs=config.components)
        results.add_plot(path, "TSNE by Cluster")

        path = secondary_analysis.plot_umap_by_cluster(tenx_analysis,
                                                       pcs=config.components)
        results.add_plot(path, "UMAP by Cluster")

        secondary_analysis.plot_cluster_markers(tenx_analysis,
                                                rep="PCA",
                                                pcs=config.components)

        pca_cluster_markers = glob.glob("figures/expression/*pca*png")
        for png in pca_cluster_markers:
            title = png.split("/")[-1].replace(".png", "").replace(
                "counts", "gene markers").upper().replace("_", "")
            results.add_plot(png, title)

        secondary_analysis.plot_cluster_markers(tenx_analysis,
                                                rep="TSNE",
                                                pcs=config.components)

        pca_cluster_markers = glob.glob("figures/expression/*tsne*png")
        for png in pca_cluster_markers:
            title = png.split("/")[-1].replace(".png", "").replace(
                "counts", "gene markers").upper().replace("_", "")
            results.add_plot(png, title)

        secondary_analysis.plot_cluster_markers(tenx_analysis,
                                                rep="UMAP",
                                                pcs=config.components)

        pca_cluster_markers = glob.glob("figures/expression/*umap*png")
        for png in pca_cluster_markers:
            title = png.split("/")[-1].replace(".png", "").replace(
                "counts", "gene markers").upper().replace("_", "")
            results.add_plot(png, title)

        embedding_file = "{0}_{1}/perplexity_{0}_regularizer_0.001_batch_size_512_learning_rate_0.01_latent_dimension_2_activation_ELU_seed_1_iter_3000.tsv".format(
            config.perplexity, config.components)
        secondary_analysis.plot_cluster_markers(tenx_analysis,
                                                rep="SCVIS",
                                                pcs=config.components,
                                                embedding_file=embedding_file)

        pca_cluster_markers = glob.glob("figures/expression/*scvis_5_50*png")
        for png in pca_cluster_markers:
            title = png.split("/")[-1].replace(".png", "").replace(
                "counts", "gene markers").upper().replace("_", "")
            results.add_plot(png, title)
    """
    Gene Level
    """
    """
    Reporting
    """
    if config.report:
        workflow.transform(name="{}_markdown".format(prefix),
                           func=exportMD,
                           args=(results, ))

    if config.report:
        workflow.transform(name="{}_finalize".format(prefix),
                           func=exportFinalize,
                           args=(results, ))

    workflow = secondary_analysis.get_workflow()
    return workflow
示例#27
0
def plot_by_markers(rdata,
                    tenx_analysis,
                    genes,
                    prefix,
                    rep,
                    pcs,
                    embedding_file=None,
                    k=12):
    genes = list(genes[:k])
    sce = SingleCellExperiment.fromRData(rdata)
    counts = sce.assays["logcounts"].toarray()
    tenx = TenxAnalysis(tenx_analysis)
    if rep == "SCVIS":
        tsne_dims = tenx.get_scvis_dimensions(embedding_file)
    else:
        tsne_dims = sce.getReducedDims(rep)
    all_genes = tenx.get_genes(sce)
    barcodes = sce.colData["Barcode"]
    x_coded = dict(zip(barcodes, tsne_dims[0]))
    y_coded = dict(zip(barcodes, tsne_dims[1]))
    if not os.path.exists("figures/expression"):
        os.makedirs("figures/expression")
    x = []
    y = []
    f = plt.figure()
    drop_rows = []
    for i, barcode in enumerate(barcodes):
        try:
            x_val = x_coded[barcode]
            y_val = y_coded[barcode]
            x.append(x_val)
            y.append(y_val)
        except Exception as e:
            drop_rows.append(i)
            continue
        print("Barcode {}".format(barcode))
    scale = preprocessing.MinMaxScaler()
    counts = scale.fit_transform(counts)
    for i, gene in enumerate(genes):
        expression = counts[all_genes.index(gene)]
        plt.subplot(3, 4, i + 1)
        expression = scale.fit_transform(
            numpy.array(expression).reshape(-1, 1))
        _expression = list(expression.flatten())
        expression = []
        for i, row in enumerate(_expression):
            if i not in drop_rows:
                expression.append(row)
        print(len(expression))
        print(len(barcodes))
        print(len(x))
        print(len(y))
        g = sns.scatterplot(x=x,
                            y=y,
                            hue=expression,
                            palette="RdYlBu_r",
                            alpha=0.7,
                            legend=False,
                            s=4)
        g.set(xticklabels=[])
        g.set(yticklabels=[])
        plt.title(gene)
        print("Gene {}".format(gene))
    plt.tight_layout()
    plt.savefig("figures/expression/{}_counts_{}.png".format(
        prefix.replace(" ", "_").lower(), rep.lower()))
示例#28
0
def umap_by_cluster_markers(rdata, tenx_analysis, prefix, pcs):
    tenx = TenxAnalysis(tenx_analysis)
    tenx.load()
    sce = SingleCellExperiment.fromRData(rdata)
    markers = tenx.markers_by_clusters(sce, rep="UMAP", pcs=pcs)
    print(markers.keys())
示例#29
0
class Results(object):

    def __init__(self, output):
        self.plots = list()
        self.output = config.jobpath
        self.report_dir = os.path.join(config.jobpath,"{}_report/".format(config.prefix))
        try:
            os.makedirs(self.report_dir)
        except Exception as e:
            pass
        self.paths = []

    def qc_reports(self):
        for html in glob.glob(os.path.join(self.output, "fastqc/*/*.html")):
            yield html

    def add_analysis(self, tenx):
        self.analysis = TenxAnalysis(tenx)
        summary = self.analysis.summary()
        dest = os.path.join(self.report_dir, "summary.html")
        self.paths.append((summary,dest))
        self.summary = "summary.html"

    def add_workflow(self, script):
        dest = os.path.join(self.report_dir, os.path.split(script)[1])
        self.script = os.path.split(dest)[1]
        self.paths.append((script,dest))

    def add_sce(self, sce):
        dest =  os.path.join(self.report_dir, os.path.split(sce)[1])
        self.sce = os.path.split(dest)[1]
        self.paths.append((sce,dest))

    def add_cellassign_pkl(self, pkl):
        dest =  os.path.join(self.report_dir, os.path.split(pkl)[1])
        self.pkl = os.path.split(dest)[1]
        self.paths.append((pkl,dest))

    def add_cellassign_raw(self, raw):
        dest =  os.path.join(self.report_dir, os.path.split(raw)[1])
        self.raw = os.path.split(dest)[1]
        self.paths.append((raw,dest))

    def add_plot(self, path, header, desc=""):
        plot = dict()
        dest =  os.path.join(self.report_dir, os.path.split(path)[1])
        self.paths.append((path, dest))

        plot["path"] = os.path.split(dest)[1]
        plot["header"] = header
        plot["desc"] = desc
        self.plots.append(plot)

    def finalize(self):
        for source, dest in self.paths:
            try:
                shutil.copyfile(source, dest)
            except Exception as e:
                continue

    def barcode_to_celltype(self):
        tsv = os.path.join(self.output,"barcode_to_celltype.tsv")
        output = open(tsv,"w")
        cell_assignments = pickle.load(open(self.pkl,"rb"))
        for barcode, ctype in zip(cell_assignments["Barcode"],cell_assignments["cell_type"]):
            output.write("{}\t{}\n".format(barcode,ctype))
        output.close()
        return tsv
示例#30
0
 def add_analysis(self, tenx):
     self.analysis = TenxAnalysis(tenx)
     summary = self.analysis.summary()
     dest = os.path.join(self.report_dir, "summary.html")
     self.paths.append((summary,dest))
     self.summary = "summary.html"