Пример #1
0
 def __init__(self, context):
     super(GO, self).__init__("go", context)
     ofactory = OntologyFactory()
     try:
         #sometimes the ontology world is down :(
         self.ont = ofactory.create('go')
     except:
         self.ont = ofactory.create('obo:go')
     #self.ont = ofactory.create('./mondo.owl')
     #This seems to be required to make the ontology actually load:
     _ = self.ont.get_level(0)
 def load_associations(self, taxon) -> None:
     taxon_map = {
         'human': 'NCBITaxon:9606',
         'mouse': 'NCBITaxon:10090',
     }
     ofactory = OntologyFactory()
     self.ontology = ofactory.create(self.ont)
     p = GafParser()
     url = ''
     if self.ont == 'go':
         # CX: GO:0008150 is biological_process, GO:0003674 is molecular_function. 
         # CX: These are 2 out of 3 top-level terms in GO ontology. 
         # CX: The excluded term is cellular_component (where gene carries out a molecular function)
         go_roots = set(self.ontology.descendants('GO:0008150') + self.ontology.descendants('GO:0003674'))
         sub_ont = self.ontology.subontology(go_roots)
         if taxon == 'mouse':
             url = "http://current.geneontology.org/annotations/mgi.gaf.gz"
         if taxon == 'human':
             url = "http://current.geneontology.org/annotations/goa_human.gaf.gz"
         assocs = p.parse(url)
         self.assocs = assocs
         assocs = [x for x in assocs if 'header' not in x.keys()]
         assocs = [x for x in assocs if x['object']['id'] in go_roots]
         self.associations = self.afactory.create_from_assocs(assocs, ontology=sub_ont)
     else:
         self.associations = \
             self.afactory.create(
                     ontology=self.ontology,
                     subject_category='gene',
                     object_category='phenotype',
                     taxon=taxon_map[taxon]
         )
Пример #3
0
def run_phenolog(ont, aset, args):
    """
    Like run_enrichment_test, but uses classes from a 2nd ontology/assocset to build the gene set.
    """
    ofactory = OntologyFactory()
    ont2 = ofactory.create(args.resource2)

    afactory = AssociationSetFactory()
    aset2 = afactory.create(ontology=ont2, file=args.file2)

    # only test for genes (or other subjects of statements) in common
    common = set(aset.subjects).intersection(aset2.subjects)
    num_common = len(common)
    logging.info("Genes in common between two KBs: {}/\{} = {}".format(
        len(aset.subjects), len(aset2.subjects), num_common))
    if num_common < 2:
        logging.error("TOO FEW")
        return None
    for n in aset.ontology.nodes():
        nl = ont.label(n, id_if_null=True)
        genes = aset.query([n])
        num_genes = len(genes)
        if num_genes > 2:
            logging.info("BASE: {} {} num={}".format(n, nl, num_genes))
            enr = aset2.enrichment_test(subjects=genes,
                                        background=aset2.subjects,
                                        labels=True)
            for r in enr:
                print("{:8.3g} {} {:20s} <-> {} {:20s}".format(
                    r['p'], n, nl, r['c'], str(r['n'])))
Пример #4
0
 def retrieve_associations(self, ont, group):
     taxon_map = {
         'human': 'NCBITaxon:9606',
         'mouse': 'NCBITaxon:10090',
     }
     ofactory = OntologyFactory()
     self.ontology = ofactory.create(ont)
     p = GafParser()
     url = ''
     if ont == 'go':
         go_roots = set(
             self.ontology.descendants('GO:0008150') +
             self.ontology.descendants('GO:0003674'))
         sub_ont = self.ontology.subontology(go_roots)
         if group == 'mouse':
             url = "http://current.geneontology.org/annotations/mgi.gaf.gz"
         if group == 'human':
             url = "http://current.geneontology.org/annotations/goa_human.gaf.gz"
         assocs = p.parse('goa_human.gaf.gz')
         #assocs = p.parse(url)
         self.assocs = assocs
         assocs = [x for x in assocs if 'header' not in x.keys()]
         assocs = [x for x in assocs if x['object']['id'] in go_roots]
         self.associations = self.afactory.create_from_assocs(
             assocs, ontology=sub_ont)
     else:
         self.associations = self.afactory.create(
             ontology=self.ontology,
             subject_category='gene',
             object_category='phenotype',
             taxon=taxon_map[group])
Пример #5
0
def test_remote_sparql():
    """
    reconstitution test
    """
    factory = OntologyFactory()
    # default method is sparql
    ont = factory.create('pato')
    g = ont.get_graph()
    info = g.node[PLOIDY]
    print(str(info))
    nodes = g.nodes()
    print(len(nodes))
    assert len(nodes) > 100
    nbrs = g.successors(PLOIDY)
    print("SUCC:" + str(nbrs))
    parents = g.predecessors(PLOIDY)
    print("PRED:" + str(parents))
    assert parents == ['PATO:0001396']
    ancs = ancestors(g, PLOIDY)
    print("ANCS:" + str(ancs))
    assert 'PATO:0000001' in ancs
    print(g)
    Q = ['.*shape.*']
    w = GraphRenderer.create('tree')

    shapes1 = ont.resolve_names(Q, is_regex=True, is_remote=False)
    print("SHAPE Q:" + str(shapes1))
    show_nodes(w, ont, shapes1)
    assert Y_SHAPED in shapes1

    shapes2 = ont.resolve_names(Q, is_regex=True, is_remote=True)
    print("SHAPE Q:" + str(shapes2))
    show_nodes(w, ont, shapes2)
    assert Y_SHAPED in shapes2
Пример #6
0
    def get(self, ontology, node):
        """
        Extract a subgraph from an ontology
        """
        args = parser.parse_args()

        ids = [node]
        if args.cnode is not None:
            ids += args.cnode

        factory = OntologyFactory()
        ont = factory.create(ontology)
        g = ont.get_filtered_graph(relations=args.relation)

        nodes = set()

        dirn = 'du'
        for id in ids:
            nodes.add(id)
            # NOTE: we use direct networkx methods as we have already extracted
            # the subgraph we want
            if dirn.find("u") > -1:
                nodes.update(nx.ancestors(g, id))
            if dirn.find("d") > -1:
                nodes.update(nx.descendants(g, id))
        subg = g.subgraph(nodes)
        ojr = OboJsonGraphRenderer()
        json_obj = ojr.to_json(subg)
        return json_obj
Пример #7
0
def test_local_json_parse():
    """
    Load ontology from JSON
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('tests/resources/pato.json')

    ploidy = ont.node(PLOIDY)
    print("PLOIDY: {}".format(ploidy))
    assert ont.label(PLOIDY) == 'ploidy'

    # exact match
    search_results = ont.search('shape')
    print("SEARCH (exact): {}".format(search_results))
    assert [SHAPE] == search_results

    # implicit regexp
    search_results = ont.search('%shape%')
    print("SEARCH (re, implicit): {}".format(search_results))
    assert SHAPE in search_results
    assert len(search_results) > 10

    # explicit regexp
    search_results = ont.search('.*shape.*', is_regex=True)
    print("SEARCH (re, explicit): {}".format(search_results))
    assert SHAPE in search_results
    assert len(search_results) > 10

    num_nodes = 0
    for n in ont.nodes():
        num_nodes = num_nodes + 1
    assert num_nodes > 100
Пример #8
0
 def __init__(self, context ):
     super(HPO, self).__init__("hpo", context)
     ofactory = OntologyFactory()
     try:
         #sometimes the ontology world is down :(
         self.ont = ofactory.create('hp')
     except:
         logger.warn('Problem reaching sparql endpoint, falling back to obo')
         try:
             self.ont = ofactory.create('obo:hp')
         except:
             logger.error('Problem reaching obo, add local owl file')
             import sys
             sys.exit(1)
             #self.ont = ofactory.create('onto_cache/hpo.owl')
     #This seems to be required to make the ontology actually load:
     _ = self.ont.get_level(0)
Пример #9
0
 def get(self):
     """
     Extract a subgraph from an ontology
     """
     factory = OntologyFactory()
     ont = factory.create()
     z = get_db()
     return {'z': z, 'test': len(ont.nodes())}
Пример #10
0
 def __init__(self, context):
     super(Mondo, self).__init__("mondo", context)
     ofactory = OntologyFactory()
     try:
         #sometimes the ontology world is down :(
         self.ont = ofactory.create('mondo')
     except:
         logger.warn(
             'Problem reaching sparql endpoint, falling back to obo')
         try:
             self.ont = ofactory.create('obo:mondo')
         except:
             logger.warn(
                 'Problem reaching obo, falling back to local owl file')
             self.ont = ofactory.create('onto_cache/mondo.owl')
     #self.ont = ofactory.create('./mondo.owl')
     #This seems to be required to make the ontology actually load:
     _ = self.ont.get_level(0)
Пример #11
0
def test_write():
    """
    write obo from json
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('tests/resources/nucleus.json')
    w = GraphRenderer.create('obo')
    w.write(ont)
Пример #12
0
 def make_apo_map():
     # load apo for term mapping
     ofactory = OntologyFactory()
     apo_ont = ofactory.create("apo")
     apo_nodes = apo_ont.nodes()
     # dict schema { 'term': 'apo_id' }
     apo_term_id = dict()
     for node in apo_nodes:
         label = apo_ont.label(node)
         apo_term_id[label] = node
     return apo_term_id
Пример #13
0
 def make_apo_map():
     # load apo for term mapping
     ofactory = OntologyFactory()
     apo_ont = ofactory.create("apo")
     apo_nodes = apo_ont.nodes()
     # dict schema { 'term': 'apo_id' }
     apo_term_id = dict()
     for node in apo_nodes:
         label = apo_ont.label(node)
         apo_term_id[label] = node
     return apo_term_id
Пример #14
0
def test_align():
    """
    Text lexical mapping
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont1 = factory.create('ma')
    ont2 = factory.create('zfa')
    lexmap = LexicalMapEngine()

    lexmap.index_ontology(ont1)
    lexmap.index_ontology(ont2)

    print(lexmap.lmap)
    print(ont1.all_synonyms())
    print(ont2.all_synonyms())
    g = lexmap.get_xref_graph()
    for x, y, d in g.edges_iter(data=True):
        print("{}<->{} :: {}".format(x, y, d))
    for x in g.nodes():
        print("{} --> {}".format(x, lexmap.grouped_mappings(x)))
Пример #15
0
def test_gaf():
    """
    Test loading from gaf
    """
    ofactory = OntologyFactory()
    afactory = AssociationSetFactory()
    ont = ofactory.create('go')
    aset = afactory.create_from_gaf(open(POMBASE, "r"), ontology=ont)
    print(str(aset))
    genes = aset.query([INTRACELLULAR])
    for g in genes:
        print("G={} '{}'".format(g, aset.label(g)))
    assert G1 in genes
Пример #16
0
def test_alt_id():
    """
    test alt_ids and replaced by
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('tests/resources/alt_id_test.json')

    for x in ont.nodes():
        if ont.is_obsolete(x):
            if ont.replaced_by(x):
                print('{} --> {}'.format(x, ont.replaced_by(x)))
            else:
                print('OBS: {} no replacement'.format(x))
Пример #17
0
def test_remote_disease():
    """
    factory test
    """
    ofactory = OntologyFactory()
    afactory = AssociationSetFactory()
    ont = ofactory.create('doid')
    aset = afactory.create(ontology=ont,
                           subject_category='disease',
                           object_category='phenotype',
                           taxon=HUMAN)

    rs = aset.query_associations([PD])
    print("Gene Assocs to PD: {} {}".format(rs, len(rs)))
Пример #18
0
def get_ontology(id):
    handle = id
    for c in cfg['ontologies']:
        if c['id'] == id:
            logging.info("getting handle for id: {} from cfg".format(id))
            handle = c['handle']

    if handle not in omap:
        logging.info("Creating a new ontology object for {}".format(handle))
        ofa = OntologyFactory()
        omap[handle] = ofa.create(handle)
    else:
        logging.info("Using cached for {}".format(handle))
    return omap[handle]
Пример #19
0
def test_subontology():
    """
    Load extracting subontology
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('tests/resources/go-truncated-pombase.json')
    print("ONT NODES: {}".format(ont.nodes()))
    subont = ont.subontology(relations=['subClassOf'])
    PERM = 'GO:1990578'
    print("NODES: {}".format(subont.nodes()))
    ancs = subont.ancestors(PERM)
    print(str(ancs))
    assert len(ancs) > 0
Пример #20
0
def test_local_json_parse():
    """
    Load ontology from JSON
    """
    factory = OntologyFactory()
    print("Creating ont")
    #tbox_ontology = factory.create('go')
    # TODO: create test module for this example
    tbox_ontology = factory.create('tests/resources/go-truncated-pombase.json')
    ont = factory.create('tests/resources/gocam-example.ttl', tbox_ontology=tbox_ontology)
    g = ont.get_graph()
    nodes = ont.search('%')
    print("NODES: {}".format(nodes))
    w = GraphRenderer.create(None)
    w.write_subgraph(ont, nodes)
    i = 'http://model.geneontology.org/0000000300000001/0000000300000007'
    ni = g[i]
    print(str(ni))
    ['GO:0060070'] == ni['types']
    nbrs = ont.neighbors(i)
    print("NEIGHBORS: {}".format(nbrs))
    subont = tbox_ontology.subontology(nodes, minimal=False)
    w = GraphRenderer.create('obo')
    print(w.render(subont))
    def load_associations(self, ontology_name:str=None, subject_category:str=None, object_category:str=None, evidence=None, taxon:str=None, relation=None, file:Union[str, TextIO]=None, fmt:str=None, skim:bool=False) -> None:
        ofactory = OntologyFactory()
        afactory = AssociationSetFactory()

        ontology = ofactory.create(ontology_name, subject_category)

        self.associations = afactory.create(
            ontology=ontology,
            subject_category=subject_category,
            object_category=object_category,
            evidence=evidence,
            taxon=taxon,
            relation=relation,
            file=file,
            fmt=fmt,
            skim=skim
        )
Пример #22
0
def initOntologies():
    ofactory = OntologyFactory()
    print("Loading Ontologies...")
    ontologies['go'] = ofactory.create('go')
    ontologies['bfo'] = ofactory.create('bfo')
    ontologies['ro'] = ofactory.create('ro')
    ontologies['cl'] = ofactory.create('cl')
    ontologies['zfa'] = ofactory.create('zfa')
    ontologies['uberon'] = ofactory.create('uberon')
    ontologies['emapa'] = ofactory.create('emapa')
    #ontologies['chebi'] = ofactory.create('chebi')
    print("Done.")
Пример #23
0
def test_obsolete():
    """
    Test obsoletion metadata
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('tests/resources/obsolete.json')
    print("ONT NODES: {}".format(ont.nodes()))
    n_obs = 0
    for nid in ont.nodes():
        is_obs = ont.is_obsolete(nid)
        if is_obs:
            print("OBS: {} {}".format(nid, ont.label(nid)))
            n_obs += 1
        rb = ont.replaced_by(nid)
        if rb is not None:
            print("REPLACED BY: {} {}".format(rb, ont.label(rb)))
    assert ont.replaced_by('GO:2') == ['GO:1']
    assert ont.replaced_by('GO:3') == ['GO:1']
    assert n_obs == 3
Пример #24
0
def test_remote_go():
    """
    factory test
    """
    ofactory = OntologyFactory()
    afactory = AssociationSetFactory()
    ont = ofactory.create('go').subontology(relations=['subClassOf', PART_OF])
    aset = afactory.create(ontology=ont,
                           subject_category='gene',
                           object_category='function',
                           taxon=MOUSE)

    rs = aset.query([TRANSCRIPTION_FACTOR], [])
    print("Mouse genes annotated to TF: {} {}".format(rs, len(rs)))
    for g in rs:
        print("  Gene: {} {}".format(g, aset.label(g)))
    set_tf = rs

    rs = aset.query([NUCLEUS], [])
    print("Mouse genes annotated to nucleus: {} {}".format(rs, len(rs)))
    set_nucleus = rs
    assert (len(rs) > 100)

    rs = aset.query([TRANSCRIPTION_FACTOR, NUCLEUS], [])
    print("Mouse TF genes annotated to nucleus: {} {}".format(rs, len(rs)))
    assert (len(rs) > 100)
    set_nucleus_tf = rs
    assert (len(rs) < len(set_nucleus))

    rs = aset.query([NUCLEUS], [TRANSCRIPTION_FACTOR])
    print("Mouse non-TF genes annotated to nucleus: {} {}".format(rs, len(rs)))
    assert (len(rs) > 100)
    set_nucleus_non_tf = rs
    assert (len(rs) < len(set_nucleus))
    assert (len(set_nucleus_tf) + len(set_nucleus_non_tf) == len(set_nucleus))

    enr = aset.enrichment_test(subjects=set_tf, labels=True)
    print("ENRICHMENT (tf): {}".format(enr))
    [match] = [x for x in enr if x['c'] == NUCLEUS]
    print("ENRICHMENT (tf) for NUCLEUS: {}".format(match))
    assert match['p'] < 0.00001
Пример #25
0
def todo_subontology():
    """
    subontology
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('scigraph:ontology')
    print("ONT NODES: {}".format(ont.nodes()))

    # TODO: implement this for SciGraph:
    subont = ont.subontology(relations=['subClassOf'])
    PERM = 'GO:1990578'
    print("NODES: {}".format(subont.nodes()))
    ancs = subont.ancestors(PERM, reflexive=True)
    print(str(ancs))
    for a in ancs:
        print(" ANC: {} '{}'".format(a,subont.label(a)))
    assert len(ancs) > 0
    from ontobio.io.ontol_renderers import GraphRenderer
    w = GraphRenderer.create('tree')
    w.write_subgraph(ont, ancs)
Пример #26
0
def test_enrichment():
    """
    enrichment
    """
    factory = OntologyFactory()
    ont = factory.create('pato')

    # gene set 'a' is biased to ploidy
    termprobs = [(QUALITY, 0.8, 0.8), (PLOIDY, 0.8, 0.2), (EUPLOID, 0.7, 0.01),
                 (SHAPE, 0.2, 0.75), (Y_SHAPED, 0.01, 0.5)]
    amap = {}
    geneset_a = []
    geneset_b = []
    for x in range(1, 100):
        for y in ['a', 'b']:
            dts = []
            for (t, p1, p2) in termprobs:
                if y == 'a':
                    p = p1
                else:
                    p = p2
                if random.random() < p:
                    dts.append(t)
            g = y + str(x)
            if y == 'a':
                geneset_a.append(g)
            else:
                geneset_b.append(g)
            amap[g] = dts
    logging.info(str(amap))
    aset = AssociationSet(ontology=ont, association_map=amap)
    logging.info(str(aset))
    print(str(geneset_a))
    results = aset.enrichment_test(geneset_a, labels=True)
    print(str(results))
    print("EXPECTED: {} {}".format(PLOIDY, EUPLOID))
    results = aset.enrichment_test(geneset_b, labels=True)
    print(str(results))
    print("EXPECTED: {} {}".format(SHAPE, Y_SHAPED))
Пример #27
0
def test_factory():
    """
    test ontology factory using wikidata as source and using PTSD.

    """
    f = OntologyFactory()
    ont = f.create('wdq:Q544006')
    for n in ont.nodes():
        print('{} "{}"'.format(n, ont.label(n)))
    qids = ont.search('anxiety%')
    assert len(qids) > 0
    print(qids)
    nodes = ont.traverse_nodes(qids, up=True, down=True)
    print(nodes)
    assert len(nodes) > 0
    labels = [ont.label(n) for n in nodes]
    print(labels)
    # Note: it's possible wd may change rendering this false
    assert 'Fear of frogs' in labels
    from ontobio.io.ontol_renderers import GraphRenderer
    w = GraphRenderer.create('tree')
    w.write(ont, query_ids=qids)
Пример #28
0
def main():
    """
    Wrapper for Assoc Parsing
    """
    parser = argparse.ArgumentParser(
        description='Wrapper for obographs assocmodel library'
        """
                                                 By default, ontologies and assocs are cached locally and synced from a remote sparql endpoint
                                                 """,
        formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('-r',
                        '--resource',
                        type=str,
                        required=False,
                        help='Name of ontology')
    parser.add_argument(
        '-f',
        '--file',
        type=str,
        required=False,
        help='Name of input file for associations - currently GAF is assumed')
    parser.add_argument('-F',
                        '--format',
                        type=str,
                        required=False,
                        help='Format of assoc file. One of GAF, GPAD or HPOA')
    parser.add_argument('-o',
                        '--outfile',
                        type=str,
                        required=False,
                        help='Path to output file')
    parser.add_argument('-m',
                        '--messagefile',
                        type=str,
                        required=False,
                        help='Path to messages (report) markdown file')
    parser.add_argument('-t',
                        '--to',
                        type=str,
                        required=False,
                        help='Output to (tree, dot, ...)')
    parser.add_argument(
        "--filter-out",
        nargs="+",
        required=False,
        default=[],
        metavar="EVIDENCE",
        help=
        "List of any evidence codes to filter out of the GAF. E.G. --filter-out IEA IMP"
    )
    parser.add_argument("--filtered-file",
                        required=False,
                        default=None,
                        metavar="FILTERED_FILE",
                        help="File to write the filtered out evidence GAF to")
    parser.add_argument(
        '-T',
        '--taxon',
        nargs='*',
        required=False,
        help='valid taxon (NCBITaxon ID) - validate against this')
    parser.add_argument('--subject_prefix',
                        nargs='*',
                        required=False,
                        help='E.g PomBase - validate against this')
    parser.add_argument('--object_prefix',
                        nargs='*',
                        required=False,
                        help='E.g GO - validate against this')
    parser.add_argument('-v',
                        '--verbosity',
                        default=0,
                        action='count',
                        help='Increase output verbosity')

    subparsers = parser.add_subparsers(dest='subcommand',
                                       help='sub-command help')

    parser_n = subparsers.add_parser('validate', help='Validate associations')
    parser_n.set_defaults(function=validate_assocs)

    parser_n = subparsers.add_parser('filter', help='Filter associations')
    parser_n.set_defaults(function=filter_assocs)

    parser_n = subparsers.add_parser('convert', help='Convert associations')
    parser_n.set_defaults(function=convert_assocs)
    parser_n.add_argument('-t',
                          '--to',
                          type=str,
                          required=True,
                          help='Format to convert to')

    parser_n = subparsers.add_parser('map2slim', help='Map to a subset/slim')
    parser_n.set_defaults(function=map2slim)
    parser_n.add_argument('-p',
                          '--properties',
                          nargs='*',
                          type=str,
                          required=False,
                          help='Properties')
    parser_n.add_argument('-s',
                          '--subset',
                          type=str,
                          required=True,
                          help='subset (e.g. map2slim)')

    args = parser.parse_args()

    if args.verbosity >= 2:
        logging.basicConfig(level=logging.DEBUG)
    elif args.verbosity == 1:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARNING)

    logging.info("Welcome!")

    handle = args.resource

    # Ontology Factory
    ofactory = OntologyFactory()
    logging.info("Creating ont object from: {} {}".format(handle, ofactory))
    ont = ofactory.create(handle)
    logging.info("ont: {}".format(ont))

    func = args.function

    # Upper case all evidence codes
    args.filter_out = [code.upper() for code in args.filter_out]

    # set configuration
    filtered_evidence_file = open(args.filtered_file,
                                  "w") if args.filtered_file else None
    config = assocparser.AssocParserConfig(
        valid_taxa=args.taxon,
        ontology=ont,
        class_idspaces=args.object_prefix,
        entity_idspaces=args.subject_prefix,
        filter_out_evidence=args.filter_out,
        filtered_evidence_file=filtered_evidence_file)
    p = None
    fmt = None
    if args.format is None:
        fmt = 'gaf'
    else:
        fmt = args.format.lower()

    # TODO: use a factory
    if fmt == 'gaf':
        p = GafParser()
    elif fmt == 'gpad':
        p = GpadParser()
    elif fmt == 'hpoa':
        p = HpoaParser()
    elif fmt == "gpi":
        p = entityparser.GpiParser()
        func = validate_entity

    p.config = config

    outfh = None
    if args.outfile is not None:
        two_mb = 2097152
        outfh = open(args.outfile, "w", buffering=two_mb)
    func(ont, args.file, outfh, p, args)
    if filtered_evidence_file:
        filtered_evidence_file.close()

    if outfh is not None:
        outfh.close()
    if args.messagefile is not None:
        mfh = open(args.messagefile, "w")
        mfh.write(p.report.to_markdown())
        mfh.close()
    else:
        print(p.report.to_markdown())
Пример #29
0
def main():
    """
    Wrapper for OGR Assocs
    """
    parser = argparse.ArgumentParser(
        description='Wrapper for obographs assocmodel library'
        """
                                                 By default, ontologies and assocs are cached locally and synced from a remote sparql endpoint
                                                 """,
        formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('-r',
                        '--resource',
                        type=str,
                        required=False,
                        help='Name of ontology')
    parser.add_argument('-f',
                        '--assocfile',
                        type=str,
                        required=False,
                        help='Name of input file for associations')
    parser.add_argument(
        '--assocformat',
        type=str,
        default='gaf',
        required=False,
        help='Format of association file, if passed (default: gaf)')
    parser.add_argument('-o',
                        '--outfile',
                        type=str,
                        required=False,
                        help='Path to output file')
    parser.add_argument('-t',
                        '--to',
                        type=str,
                        required=False,
                        help='Output to (tree, dot, ...)')
    parser.add_argument('-d',
                        '--direction',
                        type=str,
                        default='u',
                        required=False,
                        help='u = up, d = down, ud = up and down')
    parser.add_argument('-e',
                        '--evidence',
                        type=str,
                        required=False,
                        help='ECO class')
    parser.add_argument('-p',
                        '--properties',
                        nargs='*',
                        type=str,
                        required=False,
                        help='Properties')
    parser.add_argument('-P',
                        '--plot',
                        type=bool,
                        default=False,
                        help='if set, plot output (requires plotly)')
    parser.add_argument('-y',
                        '--yamlconfig',
                        type=str,
                        required=False,
                        help='Path to setup/configuration yaml file')
    parser.add_argument('-S',
                        '--slim',
                        type=str,
                        default='',
                        required=False,
                        help='Slim type. m=minimal')
    parser.add_argument('-c',
                        '--container_properties',
                        nargs='*',
                        type=str,
                        required=False,
                        help='Properties to nest in graph')
    parser.add_argument('-C',
                        '--category',
                        nargs=2,
                        type=str,
                        required=False,
                        help='category tuple (SUBJECT OBJECT)')
    parser.add_argument('-T',
                        '--taxon',
                        type=str,
                        required=False,
                        help='Taxon of associations')
    parser.add_argument('-v',
                        '--verbosity',
                        default=0,
                        action='count',
                        help='Increase output verbosity')

    subparsers = parser.add_subparsers(dest='subcommand',
                                       help='sub-command help')

    # EXTRACT ONTOLOGY
    parser_n = subparsers.add_parser(
        'subontology',
        help=
        'Extract sub-ontology, include only annotated nodes or their descendants'
    )
    parser_n.add_argument('-M',
                          '--minimal',
                          dest='minimal',
                          action='store_true',
                          default=False,
                          help='If set, remove non-MRCA nodes')
    parser_n.set_defaults(function=extract_ontology)

    # ENRICHMENT
    parser_n = subparsers.add_parser(
        'enrichment',
        help=
        'Perform an enrichment test over a sample set of annotated entities')
    parser_n.add_argument(
        '-q',
        '--query',
        type=str,
        help='query all genes for this class an use as subject')
    parser_n.add_argument('-H',
                          '--hypotheses',
                          nargs='*',
                          help='list of classes to test against')
    parser_n.add_argument(
        '-s',
        '--sample_file',
        type=str,
        help='file containing list of gene IDs in sample set')
    parser_n.add_argument(
        '-b',
        '--background_file',
        type=str,
        help='file containing list of gene IDs in background set')
    parser_n.add_argument('-t',
                          '--threshold',
                          type=float,
                          help='p-value threshold')
    parser_n.add_argument('sample_ids',
                          nargs='*',
                          help='list of gene IDs in sample set')
    parser_n.set_defaults(function=run_enrichment_test)

    # PHENOLOG
    parser_n = subparsers.add_parser(
        'phenolog',
        help=
        'Perform multiple enrichment tests, using a second ontology and assoc set to build gene sets'
    )
    parser_n.add_argument('-R',
                          '--resource2',
                          type=str,
                          required=True,
                          help='path to second GAF')
    parser_n.add_argument('-F',
                          '--file2',
                          type=str,
                          required=True,
                          help='handle for second ontology')
    parser_n.set_defaults(function=run_phenolog)

    # QUERY
    parser_n = subparsers.add_parser(
        'query',
        help=
        'Query for entities (e.g. genes) based on positive and negative terms')
    parser_n.add_argument('-q', '--query', nargs='*', help='positive classes')
    parser_n.add_argument('-N',
                          '--negative',
                          type=str,
                          help='negative classes')
    parser_n.set_defaults(function=run_query)

    # QUERY ASSOCIATIONS
    parser_n = subparsers.add_parser(
        'associations',
        help='Query for associations for a set of entities (e.g. genes)')
    parser_n.add_argument('subjects', nargs='*', help='subject ids')
    parser_n.add_argument('-D', '--dendrogram', type=bool, default=False)
    parser_n.set_defaults(function=run_query_associations)

    # INTERSECTIONS
    parser_n = subparsers.add_parser('intersections',
                                     help='Query intersections')
    parser_n.add_argument('-X', '--xterms', nargs='*', help='x classes')
    parser_n.add_argument('-Y', '--yterms', nargs='*', help='y classes')
    parser_n.add_argument('--useids',
                          type=bool,
                          default=False,
                          help='if true, use IDs not labels on axes')
    parser_n.add_argument('terms', nargs='*', help='all terms (x and y)')
    parser_n.set_defaults(function=plot_intersections)

    # INTERSECTION DENDROGRAM (TODO: merge into previous?)
    parser_n = subparsers.add_parser('intersection-dendrogram',
                                     help='Plot dendrogram from intersections')
    parser_n.add_argument('-X', '--xterms', nargs='*', help='x classes')
    parser_n.add_argument('-Y', '--yterms', nargs='*', help='y classes')
    parser_n.add_argument('--useids',
                          type=bool,
                          default=False,
                          help='if true, use IDs not labels on axes')
    parser_n.add_argument('terms', nargs='*', help='all terms (x and y)')
    parser_n.set_defaults(function=plot_term_intersection_dendrogram)

    # SIMILARITY MATRIX (may move to another module)
    parser_n = subparsers.add_parser(
        'simmatrix', help='Plot dendrogram for similarities between subjects')
    parser_n.add_argument('-X', '--xsubjects', nargs='*', help='x subjects')
    parser_n.add_argument('-Y', '--ysubjects', nargs='*', help='y subjects')
    parser_n.add_argument('--useids',
                          type=bool,
                          default=False,
                          help='if true, use IDs not labels on axes')
    parser_n.add_argument('subjects', nargs='*', help='all terms (x and y)')
    parser_n.set_defaults(function=plot_simmatrix)

    args = parser.parse_args()

    if args.verbosity >= 2:
        logging.basicConfig(level=logging.DEBUG)
    elif args.verbosity == 1:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARNING)

    if not args.assocfile:
        if not args.taxon or not args.category:
            raise ValueError(
                "Must specify EITHER assocfile OR both taxon and category")

    logging.info("Welcome!")

    if args.yamlconfig is not None:
        logging.info("Setting config from: {}".format(args.yamlconfig))
        # note this sets a global:
        # we would not do this outside the context of a standalone script
        from ontobio.config import set_config
        set_config(args.yamlconfig)

    handle = args.resource

    # Ontology Factory
    ofactory = OntologyFactory()
    logging.info("Creating ont object from: {} {}".format(handle, ofactory))
    ont = ofactory.create(handle)
    logging.info("ont: {}".format(ont))

    evidence = args.evidence
    if evidence is not None and evidence.lower() == 'noiea':
        evidence = "-ECO:0000501"

    # Association Factory
    afactory = AssociationSetFactory()
    aset = None
    if args.assocfile is not None:
        aset = afactory.create_from_file(file=args.assocfile,
                                         fmt=args.assocformat,
                                         ontology=ont)
    else:
        [subject_category, object_category] = args.category
        # create using GO/Monarch services
        aset = afactory.create(ontology=ont,
                               subject_category=subject_category,
                               object_category=object_category,
                               taxon=args.taxon)

    func = args.function
    func(ont, aset, args)
Пример #30
0
 def load_ontology(ont):
     print('loading ontology -- this can take a while')
     ofactory = OntologyFactory()
     return ofactory.create(ont)
Пример #31
0
def test_graph():
    """
    Load ontology from JSON
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('tests/resources/nucleus.json')

    icp = ont.node(INTRACELLULAR_PART)
    print("ICP: {}".format(icp))
    assert ont.label(INTRACELLULAR_PART) == 'intracellular part'

    graph = ont.get_graph()
    print("GRAPH: {}".format(graph.nodes))

    ancs = ont.ancestors(NUCLEUS)
    print("ANCS nucleus (all): {}".format(ancs))
    assert CELL in ancs
    assert CELLULAR_COMPONENT in ancs
    assert INTRACELLULAR in ancs
    assert NUCLEUS not in ancs

    ancs = ont.ancestors(INTRACELLULAR_PART)
    print("ANCS intracellular part(all): {}".format(ancs))
    assert CELL in ancs
    assert CELLULAR_COMPONENT in ancs
    assert NUCLEUS not in ancs

    ancs = ont.ancestors(INTRACELLULAR_PART, relations=['subClassOf'])
    print("ANCS intracellular part(subclass): {}".format(ancs))
    assert CELLULAR_COMPONENT in ancs
    assert CELL not in ancs
    assert NUCLEUS not in ancs

    # note: queries over *only* part_of are a non-use case, as we
    # typically always include subClassOf, due to how these chain
    # together according to OWL semantics
    ancs = ont.ancestors(INTRACELLULAR_PART, relations=[PART_OF])
    print("ANCS intracellular part(part_of): {}".format(ancs))
    assert INTRACELLULAR in ancs
    assert CELL not in ancs
    assert NUCLEUS not in ancs

    ancs = ont.parents(INTRACELLULAR_PART)
    print("PARENTS intracellular (all): {}".format(ancs))
    assert INTRACELLULAR in ancs
    assert CELL_PART in ancs
    assert CELLULAR_COMPONENT not in ancs
    assert NUCLEUS not in ancs

    ancs = ont.parents(INTRACELLULAR_PART, relations=[PART_OF])
    print("PARENTS intracellular (part_of): {}".format(ancs))
    assert INTRACELLULAR in ancs
    assert CELL_PART not in ancs
    assert CELLULAR_COMPONENT not in ancs
    assert NUCLEUS not in ancs

    decs = ont.descendants(INTRACELLULAR_PART)
    print("DECS: {}".format(decs))
    assert NUCLEUS in decs
    assert CELL not in decs

    decs = ont.descendants(INTRACELLULAR, relations=[PART_OF])
    print("DECS: {}".format(decs))
    assert INTRACELLULAR_PART in decs
    assert NUCLEUS not in decs
    assert CELL not in decs

    decs = ont.children(INTRACELLULAR)
    print("CHILDREN (all): {}".format(decs))
    assert [INTRACELLULAR_PART] == decs

    decs = ont.children(CELL_PART)
    print("CHILDREN (all): {}".format(decs))
    assert INTRACELLULAR_PART in decs
    assert INTRACELLULAR in decs

    decs = ont.children(INTRACELLULAR, relations=[PART_OF])
    print("CHILDREN (po): {}".format(decs))
    assert INTRACELLULAR_PART in decs
    assert NUCLEUS not in decs
    assert CELL not in decs

    xrefs = ont.xrefs(CELL)
    print("XREFS (from GO): {}".format(xrefs))
    assert WIKIPEDIA_CELL in xrefs
    assert NIF_CELL in xrefs
    assert len(xrefs) == 2

    # xrefs are bidirectional
    xrefs = ont.xrefs(WIKIPEDIA_CELL, bidirectional=True)
    print("XREFS (from WP, bidi): {}".format(xrefs))
    assert CELL in xrefs
    assert len(xrefs) == 1

    # xrefs queries unidirectional by default
    xrefs = ont.xrefs(WIKIPEDIA_CELL)
    print("XREFS (from WP): {}".format(xrefs))
    assert len(xrefs) == 0

    tdef = ont.text_definition(NUCLEUS)
    print("TDEF: {}".format(tdef))
    assert tdef.xrefs == ["GOC:go_curators"]
    assert tdef.val.startswith(
        "A membrane-bounded organelle of eukaryotic cells in which")

    [ldef] = ont.logical_definitions(INTRACELLULAR_PART)
    print("LDEF: {}".format(ldef))
    assert ldef.genus_ids == [CELLULAR_COMPONENT]
    assert ldef.restrictions == [(PART_OF, INTRACELLULAR)]

    syns = ont.synonyms(CELL_PART, include_label=True)
    print("SYNS: {}".format(syns))
    [s1] = [x for x in syns if x.val == 'protoplast']
    assert s1.pred == 'hasRelatedSynonym'
    assert s1.xrefs == ['GOC:mah']

    GOSLIM = 'goslim_generic'
    subsets = ont.subsets(NUCLEUS)
    print("SUBSETS: {}".format(subsets))
    assert GOSLIM in subsets
    assert len(subsets) > 0

    in_slim = ont.extract_subset(GOSLIM)
    print("IN SLIM: {}".format(in_slim))
    assert len(in_slim) > 0
    assert NUCLEUS in in_slim

    #logging.basicConfig(level=logging.DEBUG)

    assert [] == ont.search('protoplast', synonyms=False)
    assert {CELL_PART,
            INTRACELLULAR} == set(ont.search('protoplast', synonyms=True))

    assert ont.has_node(CELL_PART)
    assert not ont.has_node('FOO:123')

    # ensure subontology retains properties
    decs = ont.descendants(CELL, reflexive=True)
    subont = ont.subontology(nodes=decs)

    syns = subont.synonyms(CELL_PART, include_label=True)
    print("SYNS: {}".format(syns))
    [s1] = [x for x in syns if x.val == 'protoplast']
    assert s1.pred == 'hasRelatedSynonym'
    assert s1.xrefs == ['GOC:mah']

    assert subont.parents(NUCLEUS) == [IMBO]

    from ontobio import GraphRenderer
    w = GraphRenderer.create('obo')
    w.write(subont, query_ids=[CELL, CELL_PART, NUCLEUS])
import requests
import ontobio.sparql.wikidata as wd
from ontobio.assoc_factory import AssociationSetFactory
from dipper.graph.RDFGraph import RDFGraph
from scigraph.api.SciGraph import SciGraph
import re
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


# Make ontology from wikidata

onto_factory = OntologyFactory()
wd_ontology = onto_factory.create('wdq:Q185034')  # Sickle cell anemia
qids = wd_ontology.search('Sickle%')

# Traverse up and down from query node in our sub-ontology
nodes = wd_ontology.traverse_nodes(qids, up=True, down=True)

renderer = GraphRenderer.create('obo')
renderer.outfile = './output/wd-ontology.obo'
# renderer.write(wd_ontology)
# >> AttributeError: 'EagerWikidataOntology' object has no attribute 'all_logical_definitions'

renderer.write_subgraph(wd_ontology, nodes, query_ids=qids)

# Get GO terms
outfile = open('./output/go-terms.tsv', 'w')