示例#1
0
    def on_params_is_valid(self, exp, *args, **kwargs):
        try:
            gmt_file = self.upload_gs.get_file()
            gs = GmtStorage.read_inp(gmt_file, "\t")
            gene_sets = GeneSets(exp.get_data_folder(), str(self.uuid))
            gene_sets.store_gs(gs)
            self.set_out_var("gene_sets", gene_sets)
        except Exception as e:
            exp.log(self.uuid, e, severity="CRITICAL")
            log.error(e)

        exp.store_block(self)
示例#2
0
    def on_params_is_valid(self, exp, *args, **kwargs):
        try:
            gmt_file = self.upload_gs.get_file()
            gs = GmtStorage.read_inp(gmt_file, "\t")
            gene_sets = GeneSets(exp.get_data_folder(), str(self.uuid))
            gene_sets.store_gs(gs)
            self.set_out_var("gene_sets", gene_sets)
        except Exception as e:
            exp.log(self.uuid, e, severity="CRITICAL")
            log.error(e)

        exp.store_block(self)
示例#3
0
def map_gene_sets_to_probes(exp, block,
                            base_dir, base_filename, ann_gene_sets, src_gene_sets):
    """
    TODO: working units check

    @param filepath: Filepath to store result obj

    @type ann_gs: GeneSets
    @type gs: GeneSets

    @rtype: GeneSets
    """
    entrez_ids_to_probes_map = transpose_dict_list(ann_gene_sets.get_gs().genes)

    gene_sets_probes = GeneSets(base_dir, base_filename)

    gene_sets_probes.metadata["org"] = src_gene_sets.metadata["org"]
    gene_sets_probes.metadata["gene_units"] = GeneUnits.PROBE_ID
    gene_sets_probes.metadata["set_units"] = src_gene_sets.metadata["set_units"]
    gs = GS()
    src_gs = src_gene_sets.get_gs()
    for set_name, gene_ids in src_gs.genes.iteritems():
        tmp_set = set()
        for entrez_id in gene_ids:
            tmp_set.update(entrez_ids_to_probes_map.get(entrez_id ,[]))
        if tmp_set:
            gs.genes[set_name] = list(tmp_set)
            gs.description[set_name] = src_gs.description[set_name]

    gene_sets_probes.store_gs(gs)
    return [gene_sets_probes], {}
示例#4
0
def threshold_task(
    exp,
    block,
    es,
    T,
    base_filename,
):

    # def removeTemporaryNegativeFeatures(S, indicator_string = 'negative_feature___'):
    #     """Remove elements starting with the indicator_string and remove possible duplicates."""
    #     return S.apply(lambda list_element: set([s.replace(indicator_string, '')  for s in list_element]))
    """Computes co-comodules from matrix H by given threshold T."""
    if settings.CELERY_DEBUG:
        import sys
        sys.path.append(
            '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg'
        )
        import pydevd
        pydevd.settrace('localhost',
                        port=6901,
                        stdoutToServer=True,
                        stderrToServer=True)

    H = es.get_assay_data_frame()
    print(H)
    # mu = np.mean(H, axis = 1)
    # sigma = np.std(H, axis = 1)
    # Z = H.apply(lambda z: (z-mu)/sigma, axis = 0)
    # S = []
    # S.append(removeTemporaryNegativeFeatures(Z.apply(lambda x: Z.columns[x >= T].tolist(), axis = 1)))
    # S = pd.DataFrame(S)
    # S = S.apply(lambda x: set.union(*x))
    # result = pd.DataFrame(S)
    from wrappers.snmnmf.evaluation import EnrichmentInGeneSets
    z = 1
    x = EnrichmentInGeneSets(z)
    result = x.getGeneSet(H, T)

    gene_sets = GeneSets(exp.get_data_folder(), base_filename)
    gs = GS(result, result)
    gene_sets.store_gs(gs)

    # cs = GeneSets(exp.get_data_folder(), base_filename)
    # cs.store_set(result)
    return [gene_sets], {}
示例#5
0
def threshold_task(exp, block,
                     es,
                     T,
                     base_filename,
    ):

    # def removeTemporaryNegativeFeatures(S, indicator_string = 'negative_feature___'):
    #     """Remove elements starting with the indicator_string and remove possible duplicates."""
    #     return S.apply(lambda list_element: set([s.replace(indicator_string, '')  for s in list_element]))

    """Computes co-comodules from matrix H by given threshold T."""
    if settings.CELERY_DEBUG:
        import sys
        sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg')
        import pydevd
        pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True)


    H = es.get_assay_data_frame()
    print(H)
    # mu = np.mean(H, axis = 1)
    # sigma = np.std(H, axis = 1)
    # Z = H.apply(lambda z: (z-mu)/sigma, axis = 0)
    # S = []
    # S.append(removeTemporaryNegativeFeatures(Z.apply(lambda x: Z.columns[x >= T].tolist(), axis = 1)))
    # S = pd.DataFrame(S)
    # S = S.apply(lambda x: set.union(*x))
    # result = pd.DataFrame(S)
    from wrappers.snmnmf.evaluation import EnrichmentInGeneSets
    z = 1
    x = EnrichmentInGeneSets(z)
    result = x.getGeneSet(H, T)

    gene_sets = GeneSets(exp.get_data_folder(), base_filename)
    gs = GS(result, result)
    gene_sets.store_gs(gs)

    # cs = GeneSets(exp.get_data_folder(), base_filename)
    # cs.store_set(result)
    return [gene_sets], {}
示例#6
0
def pattern_filter_task(exp, block,
            m_rna_es,
            mi_rna_es,
            gene_sets,
            metric,
            n_best,
            base_filename):
    """
        @type m_rna_es: ExpressionSet
        @type mi_rna_es: ExpressionSet
        @type comodule_set: ComoduleSet
        @type metric: metric
        @type n_best: int
    """
    if settings.CELERY_DEBUG:
        import sys
        sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg')
        import pydevd
        pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True)


    mData = m_rna_es.get_assay_data_frame()
    pheno = m_rna_es.get_pheno_data_frame()
    classes = pheno['User_class'].values

    data = mData
    data.set_index(data.columns[0], inplace=True, drop=True)

    # data = zscore(data)
    com_set = comodule_set.load_set()

    result = pattern_filter(com_set.values(), data, classes, n_best, metric)

    result = {key: value for key, value in enumerate(result)}
    gs = GS(None, result)
    cs = GeneSets(exp.get_data_folder(), base_filename)

    cs.store_set(gs)

    return [cs], {}
示例#7
0
def pattern_filter_task(exp, block, m_rna_es, mi_rna_es, gene_sets, metric,
                        n_best, base_filename):
    """
        @type m_rna_es: ExpressionSet
        @type mi_rna_es: ExpressionSet
        @type comodule_set: ComoduleSet
        @type metric: metric
        @type n_best: int
    """
    if settings.CELERY_DEBUG:
        import sys
        sys.path.append(
            '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg'
        )
        import pydevd
        pydevd.settrace('localhost',
                        port=6901,
                        stdoutToServer=True,
                        stderrToServer=True)

    mData = m_rna_es.get_assay_data_frame()
    pheno = m_rna_es.get_pheno_data_frame()
    classes = pheno['User_class'].values

    data = mData
    com_set = gene_sets.get_gs(conv=False).genes

    result = pattern_filter(com_set.values(), data, classes, n_best, metric)

    result = {key: value for key, value in enumerate(result)}
    gs = GS(result, result)
    cs = GeneSets(exp.get_data_folder(), base_filename)

    cs.store_gs(gs)

    return [cs], {}
示例#8
0
def pattern_search(exp, block,
            m_rna_es,
            mi_rna_es,
            gene2gene,
            miRNA2gene,
            # gene_platform,
            # miRNA_platform,
            radius,
            min_imp,
            metric,
            base_filename):
    """
        @type m_rna_es: ExpressionSet
        @type mi_rna_es: ExpressionSet
        @type gene2gene: BinaryInteraction
        @type miRNA2gene: BinaryInteraction
        @type radius: int
        @type min_imp: double
    """
    if settings.CELERY_DEBUG:
        import sys
        sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg')
        import pydevd
        pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True)

    AllUpdated(
        exp.pk,
        comment=u"Initializing data...",
        silent=False,
        mode=NotifyMode.INFO
    ).send()

    exp.log(block.uuid, "Initializing data...")

    mData = m_rna_es.get_assay_data_frame()
    gene_platform = list(mData.columns)
    AllUpdated(
        exp.pk,
        comment=u"Transforming interaction matrix",
        silent=False,
        mode=NotifyMode.INFO
    ).send()

    gene2gene = gene2gene.get_matrix_for_platform(exp, gene_platform)

    AllUpdated(
        exp.pk,
        comment=u"Transforming interaction matrix done",
        silent=False,
        mode=NotifyMode.INFO
    ).send()

    # TODO fix pattern search
    # if miRNA2gene is not None:
    #     miRNA2gene = miRNA2gene.load_matrix().T
    #     miRNA2gene = sp.coo_matrix(miRNA2gene.values)
    # if mi_rna_es is not None:
    #     miData = mi_rna_es.get_assay_data_frame()
    #     mir2gene = miRNA2gene
    #     mir2gene = sp.coo_matrix(mir2gene.values).T
    #     nw = mergeNetworks(gene2gene, mir2gene)
    # else:
    nw = gene2gene
    # data = mData.ix[1:]
    data = mData
    data.set_index(data.columns[0], inplace=True, drop=True)

    data = zscore(data)
    pheno = m_rna_es.get_pheno_data_frame()
    classes = pheno['User_class'].values
    exp.log(block.uuid, "Data ready. Running Pattern Search")

    # inicializace objektu metric=metric,
    searcher = DifferentialPatternSearcher(nw, radius=radius, min_improve=min_imp,
                                           base_dir="orig_interactions/", verbose=True)

    #vlastni search
    res = searcher.search(data, classes)
    exp.log(block.uuid, "Pattern search finished.")

    # res ... list patternu,
    # tj. pro nase potreby:
    comodule_set = map(lambda pattern: [gene_platform[gene] for gene in pattern.genes], res)

    # cs = ComoduleSet(exp.get_data_folder(), base_filename)
    gene_sets = GeneSets(exp.get_data_folder(), str(exp.uuid))
    result = {key: value for key, value in enumerate(comodule_set)}
    gs = GS(None, result)
    gene_sets.store_gs(gs)

    # self.set_out_var("gene_sets", gene_sets)
    # result = {key: value for key, value in enumerate(comodule_set)}
    # cs.store_set(result)
    # exp.log(block.uuid, "ComoduleSet stored.")

    return [gs], {}
示例#9
0
文件: models.py 项目: craky/miXGENE
 def get_gene_sets(self):
     gene_sets = GeneSets(None, None)
     gene_sets.storage = GmtStorage(self.gmt_file.path)
     gene_sets.metadata["gene_units"] = self.unit
     return gene_sets
示例#10
0
def pattern_search(
        exp,
        block,
        m_rna_es,
        mi_rna_es,
        gene2gene,
        miRNA2gene,
        # gene_platform,
        # miRNA_platform,
        radius,
        min_imp,
        number_of_genes,
        metric,
        base_filename):
    """
        @type m_rna_es: ExpressionSet
        @type mi_rna_es: ExpressionSet
        @type gene2gene: BinaryInteraction
        @type miRNA2gene: BinaryInteraction
        @type radius: int
        @type min_imp: double
    """

    AllUpdated(exp.pk,
               comment=u"Initializing data...",
               silent=False,
               mode=NotifyMode.INFO).send()

    exp.log(block.uuid, "Initializing data...")

    mData = m_rna_es.get_assay_data_frame()
    gene_platform = list(mData.columns)
    AllUpdated(exp.pk,
               comment=u"Transforming interaction matrix",
               silent=False,
               mode=NotifyMode.INFO).send()

    gene2gene = gene2gene.get_matrix_for_platform(exp,
                                                  gene_platform,
                                                  symmetrize=True,
                                                  identifiers=False)

    AllUpdated(exp.pk,
               comment=u"Transforming interaction matrix done",
               silent=False,
               mode=NotifyMode.INFO).send()

    # TODO fix pattern search
    # if miRNA2gene is not None:
    #     miRNA2gene = miRNA2gene.load_matrix().T
    #     miRNA2gene = sp.coo_matrix(miRNA2gene.values)
    # if mi_rna_es is not None:
    #     miData = mi_rna_es.get_assay_data_frame()
    #     mir2gene = miRNA2gene
    #     mir2gene = sp.coo_matrix(mir2gene.values).T
    #     nw = mergeNetworks(gene2gene, mir2gene)
    # else:
    # gene2gene = gene2gene.load_matrix()
    # nw = sparse_df_to_saprse_matrix(gene2gene)
    nw = gene2gene.tocsr()
    # data = mData.ix[1:]
    data = mData
    data.set_index(data.columns[0], inplace=True, drop=True)

    data = zscore(data)
    pheno = m_rna_es.get_pheno_data_frame()
    classes = pheno['User_class'].values
    if settings.CELERY_DEBUG:
        import sys
        sys.path.append(
            '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg'
        )
        import pydevd
        pydevd.settrace('localhost',
                        port=6901,
                        stdoutToServer=True,
                        stderrToServer=True)

    exp.log(block.uuid, "Data ready. Running Pattern Search")
    seeds = np.random.choice(np.unique(nw.indices),
                             number_of_genes,
                             replace=False)
    # inicializace objektu metric=metric,
    searcher = DifferentialPatternSearcher(nw,
                                           radius=radius,
                                           min_improve=min_imp,
                                           seeds=seeds,
                                           base_dir="orig_interactions/",
                                           verbose=True)

    #vlastni search
    res = searcher.search(data, classes)
    exp.log(block.uuid, "Pattern search finished.")

    # res ... list patternu,
    # tj. pro nase potreby:
    comodule_set = map(
        lambda pattern: [gene_platform[gene] for gene in pattern.genes], res)

    # cs = ComoduleSet(exp.get_data_folder(), base_filename)
    gene_sets = GeneSets(exp.get_data_folder(),
                         "%s_ps_gene_sets" % str(block.uuid))
    result = {key: value for key, value in enumerate(comodule_set)}
    gs = GS(result, result)
    gene_sets.store_gs(gs)

    # self.set_out_var("gene_sets", gene_sets)
    # result = {key: value for key, value in enumerate(comodule_set)}
    # cs.store_set(result)
    # exp.log(block.uuid, "ComoduleSet stored.")

    return [gene_sets], {}
示例#11
0
 def get_gene_sets(self):
     gene_sets = GeneSets(None, None)
     gene_sets.storage = GmtStorage(self.gmt_file.path)
     gene_sets.metadata["gene_units"] = self.unit
     return gene_sets