示例#1
0
def get_gene_sets(gene_set_names: list, organism: str = ORGANISM, go_slims: bool = False,
                  set_sizes: tuple = (-np.inf, np.inf), reference: set = None) -> dict:
    """
    Get all gene sets.
    :param gene_set_names: Names of ontologies for which to get gene sets (as returned by list_gene_sets)
    :param organism: organism id
    :param go_slims: If ontology type (first element from tuples retured by list_gene_sets) is GO then output
    only gene sets that are in 'goslim_generic'
    :param set_sizes: Use only gene sets with size greater or equal to 1st element of set_sizes and smaller or equal to
    2nd element of set_sizes, default is -inf,inf
    :param reference: List of gene EIDs to use as a reference set. Gene set filtering based on gene set size takes in
    account only genes present in reference.
    :return: Dict with key is ontology name and values are its GeneSet objects
    """
    gene_set_ontology = dict()
    if go_slims:
        anno = go.Annotations(organism)
        anno._ensure_ontology()
        anno._ontology.set_slims_subset('goslim_generic')
        slims = anno._ontology.slims_subset
    for gene_set_name in gene_set_names:
        gene_sets = load_gene_sets(gene_set_name, str(organism))
        if reference is None:
            gene_sets = [gene_set for gene_set in gene_sets if set_sizes[0] <= len(gene_set.genes) <= set_sizes[1]]
        else:
            gene_sets = [gene_set for gene_set in gene_sets if
                         set_sizes[0] <= len(gene_set.genes & reference) <= set_sizes[1]]
        if go_slims and gene_set_name[0] == 'GO':
            gene_sets = [gene_set for gene_set in gene_sets if gene_set.gs_id in slims]
        gene_set_ontology[gene_set_name] = gene_sets
    return gene_set_ontology
示例#2
0
def go_gene_sets(tax_id: str) -> None:
    domain = 'go'
    ontology = go.Ontology(filename=f'{data_path}/{domain}/gene_ontology.obo')
    annotations = go.Annotations(tax_id,
                                 filename=f'{data_path}/{domain}/{tax_id}.tab',
                                 ontology=ontology)

    def to_gene_set(term: go.Term) -> Optional[GeneSet]:
        genes = annotations.get_genes_by_go_term(term.id)

        if len(genes) > 0:
            return GeneSet(
                gs_id=term.id,
                name=term.name,
                genes=set(genes),
                hierarchy=('GO', term.namespace),
                organism=tax_id,
                link=f'http://amigo.geneontology.org/amigo/term/{term.id}')

    gene_sets = GeneSets([
        gs for gs in [to_gene_set(term) for term in ontology.terms.values()]
        if gs is not None
    ])

    for gs_group in gene_sets.split_by_hierarchy():
        hierarchy = gs_group.common_hierarchy()
        gs_group.to_gmt_file_format(
            f'{data_path}/gene_sets/{filename(hierarchy, tax_id)}')
def go_gene_sets(org):
    """ Returns gene sets from GO.
    """

    ontology = go.Ontology()
    annotations = go.Annotations(org, ontology=ontology)

    gene_sets = []
    for termn, term in ontology.terms.items():
        genes = annotations.get_genes_by_go_term(termn)
        hier = ('GO', term.namespace)
        if len(genes) > 0:

            gs = GeneSet(gs_id=termn, name=term.name, genes=genes, hierarchy=hier,
                         organism=org, link=GO_TERM_LINK.format(termn))

            gene_sets.append(gs)

    return GeneSets(gene_sets)
    def Load(self):
        a = self.available_annotations[self.annotation_index]

        if self.ontology is None:
            self.ontology = go.Ontology()

        if a.taxid != self.loaded_annotation_code:
            self.annotations = None
            gc.collect()  # Force run garbage collection
            self.annotations = go.Annotations(a.taxid)
            self.loaded_annotation_code = a.taxid
            count = defaultdict(int)
            geneSets = defaultdict(set)

            for anno in self.annotations.annotations:
                count[anno.evidence] += 1
                geneSets[anno.evidence].add(anno.gene_id)
            for etype in go.evidenceTypesOrdered:
                ecb = self.evidenceCheckBoxDict[etype]
                ecb.setEnabled(bool(count[etype]))
                ecb.setText(etype + ": %i annots(%i genes)" % (count[etype], len(geneSets[etype])))
示例#5
0
def GO_enrichment(entrez_ids: list, organism: int = ORGANISM, fdr=0.25, slims: bool = True,
                  aspect: str = None) -> OrderedDict:
    """
    Calulate onthology enrichment for list of genes
    :param entrez_ids: entrez IDs of gene group to be analysed for enrichemnt
    :param organism: organism ID
    :param fdr: For retention of enriched gene sets
    :param slims: From Orange Annotations
    :param aspect: Which GO aspect to use. From Orange Annotations: None: all, 'Process', 'Function', 'Component'
    :return: Dict: key ontology term, value FDR. Sorted by FDR ascendingly.
    """
    anno = go.Annotations(organism)
    enrichment = anno.get_enriched_terms(entrez_ids, slims_only=slims, aspect=aspect)
    filtered = go.filter_by_p_value(enrichment, fdr)
    enriched_data = dict()
    for go_id, data in filtered.items():
        terms = anno.get_annotations_by_go_id(go_id)
        for term in terms:
            if term.go_id == go_id:
                padj = data[1]
                enriched_data[term.go_term] = padj
                break
    enriched_data = OrderedDict(sorted(enriched_data.items(), key=lambda x: x[1]))
    return enriched_data
from orangecontrib.bioinformatics import go

ontology = go.Ontology()
annotations = go.Annotations("4932", ontology=ontology)

# keys are symbol names, values are Entrez IDs
genes_ids = {'Yta7p': '853186', 'RPN2': '854735', 'RPT2': '851557'}
res = annotations.get_enriched_terms(genes_ids.values())

print(res)
print("Enriched terms:")
for go_id, (genes, p_value, ref) in res.items():
    if p_value < 0.05:
        print(ontology[go_id].name + " with p-value: %.4f " % p_value +
              ", ".join(genes))

# And again for slims
annotations.ontology.set_slims_subset('goslim_yeast')

res = annotations.get_enriched_terms(genes_ids.values(), slims_only=True)
print("\n\nEnriched slim terms:")
for go_id, (genes, p_value, _) in res.items():
    if p_value < 0.2:
        print(ontology[go_id].name + " with p-value: %.4f " % p_value +
              ", ".join(genes))