Exemplo n.º 1
0
def test_semsearch():
    afa = AssociationSetFactory()
    f = POMBASE
    ont = OntologyFactory().create(ONT)
    parser = GafParser()
    assocs = parser.parse(POMBASE, skipheader=True)
    assocs = [a for a in assocs if a['subject']['label'] in GENES]
    aset = afa.create_from_assocs(assocs, ontology=ont)
    ont = aset.subontology()
    aset.ontology = ont
    logging.info('Genes={} Terms={}'.format(len(aset.subjects),
                                            len(ont.nodes())))

    print('STATS={}'.format(aset.as_dataframe().describe()))

    #genes = aset.subjects[0:5]
    sse = SemSearchEngine(assocmodel=aset)

    logging.info('Calculating all MICAs')
    sse.calculate_all_micas()

    #h5path = 'tests/resources/mica_ic.h5'
    #logging.info('Saving to {}'.format(h5path))
    #sse.mica_ic_df.to_hdf(h5path, key='mica_ic', mode='w')
    #logging.info('Saved to {}'.format(h5path))

    logging.info('Doing pairwise')
    for i in aset.subjects:
        for j in aset.subjects:
            sim = sse.pw_score_cosine(i, j)
            #print('{} x {} = {}'.format(i,j,sim))
            if i == j:
                assert (sim > 0.9999)
            tups = sse.pw_score_resnik_bestmatches(i, j)
            print('{} x {} = {} // {}'.format(i, j, sim, tups))
Exemplo n.º 2
0
def test_default_gaf_version():
    p = GafParser()

    assocs = p.parse(open("tests/resources/test-qualifiers-no-version.gaf"),
                     skipheader=True)

    assert p.version == "2.1"
Exemplo n.º 3
0
def test_qualifiers_gaf():
    # ont = OntologyFactory().create(ONT)

    p = GafParser()
    # p.config.ontology = ont
    assocs = p.parse(open(QGAF, "r"), skipheader=True)
    neg_assocs = [a for a in assocs if a['negated'] == True]
    assert len(neg_assocs) == 3
    for a in assocs:
        print('REL: {}'.format(a['relation']))

    assert len([a for a in assocs
                if a['relation']['id'] == 'contributes_to']) == 1

    # For the space in `colocalizes with`
    assert len(
        list(
            filter(
                lambda e: e["obj"] == "colocalizes with",
                p.report.to_report_json()["messages"]["gorule-0000001"]))) == 1
    assert len(
        list(
            filter(
                lambda e: e["obj"] == "involved_in",
                p.report.to_report_json()["messages"]["gorule-0000001"]))) == 1
Exemplo n.º 4
0
def test_errors_gaf():
    config = assocparser.AssocParserConfig(ecomap=EcoMap())
    p = GafParser(config=config)
    assocs = p.parse(open("tests/resources/errors.gaf", "r"), skipheader=True)
    msgs = p.report.messages
    print(json.dumps(p.report.to_report_json(), indent=4))
    # print("MESSAGES: {}".format(len(msgs)))
    n_invalid_idspace = 0
    for m in msgs:
        print("MESSAGE: {}".format(m))
        if m['type'] == assocparser.Report.INVALID_IDSPACE:
            n_invalid_idspace += 1
    assert len(msgs) == 16
    assert n_invalid_idspace == 1
    assert len(assocs) == 5

    w = GafWriter()
    w.write(assocs)
    for a in assocs:
        if a['object_extensions'] != {}:
            # our test file has no ORs, so in DNF this is always the first
            xs = a['object_extensions']['union_of'][0]['intersection_of']
            for x in xs:

                print('X: {}'.format(x))
                # ensure that invalid expressions have been eliminated
                assert x['property'] == 'foo'
                assert x['filler'] == 'X:1'
            assert len(xs) == 1
Exemplo n.º 5
0
def test_no_flag_valid_id():
    ont = OntologyFactory().create(ONT)
    p = GafParser()
    p.config.ontology = ont
    p._validate_ontology_class_id(
        "GO:0000785", assocparser.SplitLine("fake", [""] * 17, taxon="foo"))
    assert len(p.report.messages) == 0
Exemplo n.º 6
0
 def retrieve_associations(self, ont, group):
     taxon_map = {
         'human': 'NCBITaxon:9606',
         'mouse': 'NCBITaxon:10090',
     }
     ofactory = OntologyFactory()
     self.ontology = ofactory.create(ont)
     p = GafParser()
     url = ''
     if ont == 'go':
         go_roots = set(
             self.ontology.descendants('GO:0008150') +
             self.ontology.descendants('GO:0003674'))
         sub_ont = self.ontology.subontology(go_roots)
         if group == 'mouse':
             url = "http://current.geneontology.org/annotations/mgi.gaf.gz"
         if group == 'human':
             url = "http://current.geneontology.org/annotations/goa_human.gaf.gz"
         assocs = p.parse('goa_human.gaf.gz')
         #assocs = p.parse(url)
         self.assocs = assocs
         assocs = [x for x in assocs if 'header' not in x.keys()]
         assocs = [x for x in assocs if x['object']['id'] in go_roots]
         self.associations = self.afactory.create_from_assocs(
             assocs, ontology=sub_ont)
     else:
         self.associations = self.afactory.create(
             ontology=self.ontology,
             subject_category='gene',
             object_category='phenotype',
             taxon=taxon_map[group])
Exemplo n.º 7
0
def produce_gaf(dataset, source_gaf, ontology_graph, gpipath=None, paint=False, group="unknown"):
    filtered_associations = open(os.path.join(os.path.split(source_gaf)[0], "{}_noiea.gaf".format(dataset)), "w")

    config = assocparser.AssocParserConfig(
        ontology=ontology_graph,
        filter_out_evidence=["IEA"],
        filtered_evidence_file=filtered_associations,
        gpi_authority_path=gpipath,
        paint=paint
    )
    validated_gaf_path = os.path.join(os.path.split(source_gaf)[0], "{}_valid.gaf".format(dataset))
    outfile = open(validated_gaf_path, "w")
    gafwriter = GafWriter(file=outfile)

    click.echo("Validating source GAF: {}".format(source_gaf))
    parser = GafParser(config=config, group=group, dataset=dataset)
    with open(source_gaf) as sg:
        lines = sum(1 for line in sg)

    with open(source_gaf) as gaf:
        with click.progressbar(iterable=parser.association_generator(file=gaf), length=lines) as associations:
            for assoc in associations:
                gafwriter.write_assoc(assoc)

    outfile.close()
    filtered_associations.close()

    with open(os.path.join(os.path.split(source_gaf)[0], "{}.report.md".format(dataset)), "w") as report_md:
        report_md.write(parser.report.to_markdown())

    with open(os.path.join(os.path.split(source_gaf)[0], "{}.report.json".format(dataset)), "w") as report_json:
        report_json.write(json.dumps(parser.report.to_report_json(), indent=4))

    return [validated_gaf_path, filtered_associations.name]
Exemplo n.º 8
0
def test_errors_gaf():
    config = assocparser.AssocParserConfig(ecomap=EcoMap())
    p = GafParser(config=config)
    assocs = p.parse(open("tests/resources/errors.gaf", "r"), skipheader=True)
    msgs = p.report.messages
    print(json.dumps(p.report.to_report_json(), indent=4))
    # print("MESSAGES: {}".format(len(msgs)))
    n_invalid_idspace = 0
    for m in msgs:
        print("MESSAGE: {}".format(m))
        if m['type'] == assocparser.Report.INVALID_IDSPACE:
            n_invalid_idspace += 1
    assert len(msgs) == 13
    assert n_invalid_idspace == 1
    assert len(assocs) == 2

    w = GafWriter()
    w.write(assocs)
    for a in assocs:
        if a.object_extensions != []:
            # our test file has no ORs, so in DNF this is always the first
            xs = a.object_extensions[0].elements
            print(xs)
            for x in xs:

                print('X: {}'.format(x))
                # ensure that invalid expressions have been eliminated
                assert x.relation == association.Curie("BFO", "0000050")
                assert x.term == association.Curie.from_str('X:1')
            assert len(xs) == 1
Exemplo n.º 9
0
def produce_gpi(dataset, target_dir, gaf_path, ontology_graph):
    gafparser = GafParser()
    gafparser.config = assocparser.AssocParserConfig(ontology=ontology_graph)
    with open(gaf_path) as sg:
        lines = sum(1 for line in sg)

    gpi_path = os.path.join(
        os.path.split(gaf_path)[0], "{}.gpi".format(dataset))
    with open(gaf_path) as gf, open(gpi_path, "w") as gpi:
        click.echo("Using {} as the gaf to build gpi with".format(gaf_path))
        bridge = gafgpibridge.GafGpiBridge()
        gpiwriter = entitywriter.GpiWriter(file=gpi)
        gpi_cache = set()

        with click.progressbar(
                iterable=gafparser.association_generator(file=gf),
                length=lines) as associations:
            for association in associations:
                entity = bridge.convert_association(association)
                if entity not in gpi_cache and entity is not None:
                    # If the entity is not in the cache, add it and write it out
                    gpi_cache.add(entity)
                    gpiwriter.write_entity(entity)

    return gpi_path
 def load_associations(self, taxon) -> None:
     taxon_map = {
         'human': 'NCBITaxon:9606',
         'mouse': 'NCBITaxon:10090',
     }
     ofactory = OntologyFactory()
     self.ontology = ofactory.create(self.ont)
     p = GafParser()
     url = ''
     if self.ont == 'go':
         # CX: GO:0008150 is biological_process, GO:0003674 is molecular_function. 
         # CX: These are 2 out of 3 top-level terms in GO ontology. 
         # CX: The excluded term is cellular_component (where gene carries out a molecular function)
         go_roots = set(self.ontology.descendants('GO:0008150') + self.ontology.descendants('GO:0003674'))
         sub_ont = self.ontology.subontology(go_roots)
         if taxon == 'mouse':
             url = "http://current.geneontology.org/annotations/mgi.gaf.gz"
         if taxon == 'human':
             url = "http://current.geneontology.org/annotations/goa_human.gaf.gz"
         assocs = p.parse(url)
         self.assocs = assocs
         assocs = [x for x in assocs if 'header' not in x.keys()]
         assocs = [x for x in assocs if x['object']['id'] in go_roots]
         self.associations = self.afactory.create_from_assocs(assocs, ontology=sub_ont)
     else:
         self.associations = \
             self.afactory.create(
                     ontology=self.ontology,
                     subject_category='gene',
                     object_category='phenotype',
                     taxon=taxon_map[taxon]
         )
Exemplo n.º 11
0
def test_errors_gaf():
    p = GafParser()
    p.config.ecomap = EcoMap()
    assocs = p.parse(open("tests/resources/errors.gaf", "r"))
    msgs = p.report.messages
    print("MESSAGES: {}".format(len(msgs)))
    for m in msgs:
        print("MESSAGE: {}".format(m))
    assert len(msgs) == 15

    # we expect 4
    assert len(assocs) == 7
    from ontobio.io import GafWriter
    w = GafWriter()
    w.write(assocs)
    for a in assocs:
        if 'object_extensions' in a:
            # our test file has no ORs, so in DNF this is always the first
            xs = a['object_extensions']['union_of'][0]['intersection_of']
            for x in xs:

                print('X: {}'.format(x))
                # ensure that invalid expressions have been eliminated
                assert x['property'] == 'foo'
                assert x['filler'] == 'X:1'
            assert len(xs) == 1
Exemplo n.º 12
0
def test_bad_date():
    p = GafParser()
    assoc_result = p.parse_line(
        "PomBase\tSPAC25B8.17\typf1\t\tGO:0000007\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\tTODAY\tPomBase\tfoo(X:1)"
    )
    assert assoc_result.skipped == True
    assert assoc_result.associations == []
Exemplo n.º 13
0
def produce_ttl(dataset, target_dir, gaf_path, ontology_graph):
    gafparser = GafParser()
    gafparser.config = assocparser.AssocParserConfig(ontology=ontology_graph)

    with open(gaf_path) as sg:
        lines = sum(1 for line in sg)

    ttl_path = os.path.join(
        os.path.split(gaf_path)[0], "{}_cam.ttl".format(dataset))
    click.echo("Producing ttl: {}".format(ttl_path))
    rdf_writer = assoc_rdfgen.TurtleRdfWriter()
    transformer = assoc_rdfgen.CamRdfTransform(writer=rdf_writer)
    parser_config = assocparser.AssocParserConfig(ontology=ontology_graph)

    with open(gaf_path) as gf:
        with click.progressbar(
                iterable=gafparser.association_generator(file=gf),
                length=lines) as associations:
            for association in associations:
                if "header" not in association or not association["header"]:
                    transformer.provenance()
                    transformer.translate(association)

    with open(ttl_path, "wb") as ttl:
        click.echo("Writing ttl to disk")
        rdf_writer.serialize(destination=ttl)

    return ttl_path
Exemplo n.º 14
0
def test_subject_extensions():
    p = GafParser()
    assoc_result = p.parse_line("PomBase\tSPAC25B8.17\typf1\t\tGO:0000007\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\t20181024\tPomBase\tfoo(X:1)\tUniProtKB:P12345")
    print(json.dumps(assoc_result.associations[0], indent=4))
    assert "subject_extensions" in assoc_result.associations[0]
    subject_extensions = assoc_result.associations[0]['subject_extensions']
    gene_product_form_id = [extension["filler"] for extension in subject_extensions if extension["property"] == "isoform"][0]
    assert gene_product_form_id == "UniProtKB:P12345"
Exemplo n.º 15
0
def test_one_line():
    p = GafParser(config=assocparser.AssocParserConfig(
        ontology=OntologyFactory().create(
            "tests/resources/goslim_generic.json")))

    parsed = p.parse_line(
        "PomBase	SPBC16D10.09	pcn1		GO:0009536	PMID:8663159	IDA		C	PCNA	pcn	protein	taxon:4896	20150326	PomBase"
    )
Exemplo n.º 16
0
def test_gaf_2_1_upconvert_in_parse():
    gaf = io.StringIO("!gaf-version: 2.1\nSGD\tS000000819\tAFG3\t\tGO:0005840\tPMID:8681382|SGD_REF:S000055187\tIMP\t\tP\tMitochondrial inner membrane m-AAA protease component\tYER017C|AAA family ATPase AFG3|YTA10\tgene\ttaxon:559292\t20170428\tSGD")
    ontology = OntologyFactory().create("tests/resources/goslim_generic.json")
    p = GafParser(config=assocparser.AssocParserConfig(ontology=ontology))

    # We're 2.1, qualifier blank, cell component term from above, ontology defined: should upgrade
    assocs = p.parse(gaf, skipheader=True)
    assert assocs[0].relation == association.Curie(namespace="BFO", identity="0000050")
Exemplo n.º 17
0
def test_subject_extensions():
    p = GafParser()
    assoc_result = p.parse_line("PomBase\tSPAC25B8.17\typf1\t\tGO:0000007\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\t20181024\tPomBase\tpart_of(X:1)\tUniProtKB:P12345")
    assert len(assoc_result.associations[0].subject_extensions) == 1

    subject_extensions = assoc_result.associations[0].subject_extensions
    gene_product_form_id = subject_extensions[0].term
    assert gene_product_form_id == association.Curie.from_str("UniProtKB:P12345")
Exemplo n.º 18
0
def test_errors_gaf():
    p = GafParser()
    p.config.ecomap = EcoMap()
    assocs = p.parse(open("tests/resources/errors.gaf", "r"))
    msgs = p.report.messages
    print("MESSAGES: {}".format(len(msgs)))
    for m in msgs:
        print("MESSAGE: {}".format(m))
    assert len(msgs) == 8
Exemplo n.º 19
0
def test_bad_withfrom():
    p = GafParser()
    # With/from has no identity portion after the namespace
    assoc_result = p.parse_line(
        "PomBase\tSPAC25B8.17\typf1\t\tGO:0000007\tGO_REF:0000024\tISO\tSGD:\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\t20181024\tPomBase"
    )
    assert assoc_result.associations == []
    assert p.report.to_report_json(
    )["messages"]["gorule-0000001"][0]["obj"] == "SGD:"
Exemplo n.º 20
0
    def load_associations_from_file(self, associations_type: DataType,
                                    associations_url: str,
                                    associations_cache_path: str,
                                    config: GenedescConfigParser) -> None:
        """load go associations from file

        Args:
            associations_type (DataType): the type of associations to set
            associations_url (str): url to the association file
            associations_cache_path (str): path to cache file for the associations
            config (GenedescConfigParser): configuration object where to read properties
        """
        assoc_config = AssocParserConfig(remove_double_prefixes=True,
                                         paint=True)
        if associations_type == DataType.GO:
            logger.info("Loading GO associations from file")
            self.go_associations = AssociationSetFactory().create_from_assocs(
                assocs=GafParser(config=assoc_config).parse(
                    file=self._get_cached_file(
                        cache_path=associations_cache_path,
                        file_source_url=associations_url),
                    skipheader=True),
                ontology=self.go_ontology)
            self.go_associations = self.remove_blacklisted_annotations(
                association_set=self.go_associations,
                ontology=self.go_ontology,
                terms_blacklist=config.get_module_property(
                    module=Module.GO, prop=ConfigModuleProperty.EXCLUDE_TERMS))
        elif associations_type == DataType.DO:
            logger.info("Loading DO associations from file")
            self.do_associations = AssociationSetFactory().create_from_assocs(
                assocs=GafParser(config=assoc_config).parse(
                    file=self._get_cached_file(
                        cache_path=associations_cache_path,
                        file_source_url=associations_url),
                    skipheader=True),
                ontology=self.do_ontology)
            self.do_associations = self.remove_blacklisted_annotations(
                association_set=self.do_associations,
                ontology=self.do_ontology,
                terms_blacklist=config.get_module_property(
                    module=Module.DO_EXP_AND_BIO,
                    prop=ConfigModuleProperty.EXCLUDE_TERMS))
        elif associations_type == DataType.EXPR:
            logger.info("Loading Expression associations from file")
            self.expression_associations = AssociationSetFactory(
            ).create_from_assocs(assocs=GafParser(config=assoc_config).parse(
                file=self._get_cached_file(cache_path=associations_cache_path,
                                           file_source_url=associations_url),
                skipheader=True),
                                 ontology=self.expression_ontology)
            self.expression_associations = self.remove_blacklisted_annotations(
                association_set=self.expression_associations,
                ontology=self.expression_ontology,
                terms_blacklist=config.get_module_property(
                    module=Module.EXPRESSION,
                    prop=ConfigModuleProperty.EXCLUDE_TERMS))
Exemplo n.º 21
0
def test_skim_gaf():
    p = GafParser()
    p.config.ecomap = EcoMap()
    results = p.skim(open(POMBASE, "r"))
    assert len(results) == 370
    for r in results:
        print(str(r))
        (s, sn, o) = r
        assert o.startswith('GO:')
        assert s.startswith('PomBase:')
Exemplo n.º 22
0
def test_alt_id_repair():
    p = GafParser()
    ont = OntologyFactory().create(ALT_ID_ONT)
    p.config.ecomap = EcoMap()
    p.config.ontology = ont
    gaf = io.StringIO("SGD\tS000000819\tAFG3\t\tGO:0043623\tPMID:8681382|SGD_REF:S000055187\tIMP\t\tP\tMitochondrial inner membrane m-AAA protease component\tYER017C|AAA family ATPase AFG3|YTA10\tgene\ttaxon:559292\t20170428\tSGD")

    assocs = p.parse(gaf, skipheader=True)
    assert len(assocs) > 0
    assert assocs[0]["object"]["id"] == "GO:0043623"
Exemplo n.º 23
0
 def create_from_remote_file(self, group, snapshot=True, **args):
     """
     Creates from remote GAF
     """
     import requests
     url = "http://snapshot.geneontology.org/annotations/{}.gaf.gz".format(group)
     r = requests.get(url, stream=True)
     p = GafParser()
     results = p.skim(r.raw)
     return self.create_from_tuples(results, **args)
Exemplo n.º 24
0
 def create_from_remote_file(self, group, snapshot=True, **args):
     """
     Creates from remote GAF
     """
     import requests
     url = "http://snapshot.geneontology.org/annotations/{}.gaf.gz".format(group)
     r = requests.get(url, stream=True, headers={'User-Agent': get_user_agent(modules=[requests], caller_name=__name__)})
     p = GafParser()
     results = p.skim(r.raw)
     return self.create_from_tuples(results, **args)
Exemplo n.º 25
0
def make_products(dataset, target_dir, gaf_path, products, ontology_graph):
    gafparser = GafParser()
    gafparser.config = assocparser.AssocParserConfig(
        ontology=ontology_graph,
        paint=True
    )

    with open(gaf_path) as sg:
        lines = sum(1 for line in sg)

    product_files = {
        "gpad": open(os.path.join(os.path.split(gaf_path)[0], "{}.gpad".format(dataset)), "w"),
        "ttl": open(os.path.join(os.path.split(gaf_path)[0], "{}_cam.ttl".format(dataset)), "wb")
    }
    
    if not products["gpad"] and not products["ttl"]:
        # Bail if we have no products
        return []

    # def write_gpi_entity(association, bridge, gpiwriter):
    with open(gaf_path) as gf:
        # gpi info:
        click.echo("Using {} as the gaf to build data products with".format(gaf_path))
        if products["ttl"]:
            click.echo("Setting up {}".format(product_files["ttl"].name))
            rdf_writer = assoc_rdfgen.TurtleRdfWriter(label=os.path.split(product_files["ttl"].name)[1] )
            transformer = assoc_rdfgen.CamRdfTransform(writer=rdf_writer)
            parser_config = assocparser.AssocParserConfig(ontology=ontology_graph)

        if products["gpad"]:
            click.echo("Setting up {}".format(product_files["gpad"].name))
            gpadwriter = GpadWriter(file=product_files["gpad"])

        click.echo("Making products...")
        with click.progressbar(iterable=gafparser.association_generator(file=gf), length=lines) as associations:
            for association in associations:
                if products["ttl"]:
                    if "header" not in association or not association["header"]:
                        transformer.provenance()
                        transformer.translate(association)

                if products["gpad"]:
                    gpadwriter.write_assoc(association)

        # post ttl steps
        if products["ttl"]:
            click.echo("Writing ttl to disk")
            rdf_writer.serialize(destination=product_files["ttl"])

        # After we run through associations
        for f in product_files.values():
            f.close()

    return [product_files[prod].name for prod in sorted(product_files.keys()) if products[prod]]
Exemplo n.º 26
0
def test_qualifiers_gaf_2_2():

    p = GafParser()

    assocs = p.parse(open("tests/resources/test-qualifiers-2.2.gaf"), skipheader=True)
    # NOT by itself is not allowed
    assert len(list(filter(lambda e: e["obj"] == "NOT", p.report.to_report_json()["messages"]["gorule-0000001"]))) == 1
    assert len(list(filter(lambda e: e["obj"] == "contributes_to|enables", p.report.to_report_json()["messages"]["gorule-0000001"]))) == 1


    assert len([a for a in assocs if association.Curie.from_str("RO:0004035") in a.qualifiers]) == 1
Exemplo n.º 27
0
def test_upgrade_qualifiers_for_cell_component():
    line = ["SGD", "S000000819", "AFG3", "", "GO:0008372", "PMID:8681382|SGD_REF:S000055187", "IMP", "", "P",
            "Mitochondrial inner membrane m-AAA protease component", "YER017C|AAA family ATPase AFG3|YTA10", "gene",
            "taxon:559292", "20170428", "SGD"]
    ontology = OntologyFactory().create("tests/resources/goslim_generic.json")
    p = GafParser(config=assocparser.AssocParserConfig(ontology=ontology))
    p.make_internal_cell_component_closure()

    parsed = gafparser.to_association(line)
    assoc = p.upgrade_empty_qualifier(parsed.associations[0])
    assert assoc.qualifiers[0] == association.Curie(namespace="RO", identity="0002432")
Exemplo n.º 28
0
def test_object_extensions():
    p = GafParser()
    assoc_result = p.parse_line("PomBase\tSPAC25B8.17\typf1\t\tGO:0000007\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\t20181024\tPomBase\tpart_of(X:1)\tUniProtKB:P12345")
    print(p.report.to_markdown())
    assert len(assoc_result.associations[0].object_extensions) > 0
    object_extensions = [
        association.ConjunctiveSet([
            association.ExtensionUnit(association.Curie("BFO", "0000050"), association.Curie("X", "1"))
        ])
    ]
    assert assoc_result.associations[0].object_extensions == object_extensions
Exemplo n.º 29
0
 def load_associations(self, group):
     p = GafParser()
     afactory = AssociationSetFactory()
     url = "http://geneontology.org/gene-associations/gene_association.{}.gz".format(
         group)
     if group == 'human':
         url = "http://geneontology.org/gene-associations/goa_human.gaf.gz"
     assocs = p.parse(url)
     assocs = [x for x in assocs if 'header' not in x.keys()]
     self.associations = afactory.create_from_assocs(assocs,
                                                     ontology=self.ontology)
Exemplo n.º 30
0
def test_gaf_2_1_creates_cell_component_closure():
    ontology = OntologyFactory().create("tests/resources/goslim_generic.json")
    closure = gafparser.protein_complex_sublcass_closure(ontology)
    # "GO:1902494" as an example that should be in the set
    assert "GO:0005840" in closure

    p = GafParser(config=assocparser.AssocParserConfig(ontology=ontology))
    with open("tests/resources/pombase_single.gaf") as gaf:
        # First line will be version declaration, triggering closure computation
        p.parse_line(gaf.readline())

    assert "GO:0005840" in p.cell_component_descendants_closure
Exemplo n.º 31
0
    def parse(self, limit=None):
        """
        Override Source.parse()
        Args:
            :param limit (int, optional) limit the number of rows processed
        Returns:
            :return None
        """
        if limit is not None:
            LOG.info("Only parsing first %d rows", limit)

        rgd_file = '/'.join(
            (self.rawdir, self.files['rat_gene2mammalian_phenotype']['file']))
        # ontobio gafparser implemented here
        p = GafParser()
        assocs = p.parse(open(rgd_file, "r"))

        for i, assoc in enumerate(assocs):
            if 'relation' in assoc.keys():
                self.make_association(assoc)
            if limit is not None and i > limit:
                break
        return