示例#1
0
    def doGeneSetEnrichment(self, request, context):
        gene_set_file_path = os.path.join(self.dfh.get_gene_sets_dir(),
                                          request.geneSetFilePath)
        loom = self.lfh.get_loom(loom_file_path=request.loomFilePath)
        gse = _gse.GeneSetEnrichment(scope=self,
                                     method="AUCell",
                                     loom=loom,
                                     gene_set_file_path=gene_set_file_path,
                                     annotation='')

        # Running AUCell...
        yield gse.update_state(step=-1,
                               status_code=200,
                               status_message="Running AUCell...",
                               values=None)
        time.sleep(1)

        # Reading gene set...
        yield gse.update_state(step=0,
                               status_code=200,
                               status_message="Reading the gene set...",
                               values=None)
        with open(gse.gene_set_file_path, 'r') as f:
            # Skip first line because it contains the name of the signature
            gs = GeneSignature(name='Gene Signature #1',
                               gene2weight=[
                                   line.strip() for idx, line in enumerate(f)
                                   if idx > 0
                               ])
        time.sleep(1)

        if not gse.has_AUCell_rankings():
            # Creating the matrix as DataFrame...
            yield gse.update_state(step=1,
                                   status_code=200,
                                   status_message="Creating the matrix...",
                                   values=None)
            loom = self.lfh.get_loom(loom_file_path=request.loomFilePath)
            dgem = np.transpose(loom.get_connection()[:, :])
            ex_mtx = pd.DataFrame(data=dgem,
                                  index=loom.get_ca_attr_by_name("CellID"),
                                  columns=loom.get_genes())
            # Creating the rankings...
            start_time = time.time()
            yield gse.update_state(step=2.1,
                                   status_code=200,
                                   status_message="Creating the rankings...",
                                   values=None)
            rnk_mtx = create_rankings(ex_mtx=ex_mtx)
            # Saving the rankings...
            yield gse.update_state(step=2.2,
                                   status_code=200,
                                   status_message="Saving the rankings...",
                                   values=None)
            lp.create(gse.get_AUCell_ranking_filepath(), rnk_mtx.as_matrix(),
                      {"CellID": loom.get_cell_ids()},
                      {"Gene": loom.get_genes()})
            print("Debug: %s seconds elapsed ---" % (time.time() - start_time))
        else:
            # Load the rankings...
            yield gse.update_state(
                step=2,
                status_code=200,
                status_message="Rankings exists: loading...",
                values=None)
            rnk_loom = self.lfh.get_loom_connection(
                gse.get_AUCell_ranking_filepath())
            rnk_mtx = pd.DataFrame(data=rnk_loom[:, :],
                                   index=rnk_loom.ra.CellID,
                                   columns=rnk_loom.ca.Gene)

        # Calculating AUCell enrichment...
        start_time = time.time()
        yield gse.update_state(
            step=3,
            status_code=200,
            status_message="Calculating AUCell enrichment...",
            values=None)
        aucs = enrichment(rnk_mtx, gs).loc[:, "AUC"].values

        print("Debug: %s seconds elapsed ---" % (time.time() - start_time))
        yield gse.update_state(step=4,
                               status_code=200,
                               status_message=gse.get_method() +
                               " enrichment done!",
                               values=aucs)
示例#2
0
def test_create_rankings():
    ex_mtx = exp_matrix()
    df_rnk = create_rankings(ex_mtx)
    n_genes = ex_mtx.shape[1]
    assert len(df_rnk.sum(axis=1).unique()) == 1
    assert (df_rnk + 1).sum(axis=1).unique()[0] == (n_genes * (n_genes+1))/2.0