Exemplo n.º 1
0
def test_errors_gaf():
    config = assocparser.AssocParserConfig(ecomap=EcoMap())
    p = GafParser(config=config)
    assocs = p.parse(open("tests/resources/errors.gaf", "r"), skipheader=True)
    msgs = p.report.messages
    print(json.dumps(p.report.to_report_json(), indent=4))
    # print("MESSAGES: {}".format(len(msgs)))
    n_invalid_idspace = 0
    for m in msgs:
        print("MESSAGE: {}".format(m))
        if m['type'] == assocparser.Report.INVALID_IDSPACE:
            n_invalid_idspace += 1
    assert len(msgs) == 16
    assert n_invalid_idspace == 1
    assert len(assocs) == 5

    w = GafWriter()
    w.write(assocs)
    for a in assocs:
        if a['object_extensions'] != {}:
            # our test file has no ORs, so in DNF this is always the first
            xs = a['object_extensions']['union_of'][0]['intersection_of']
            for x in xs:

                print('X: {}'.format(x))
                # ensure that invalid expressions have been eliminated
                assert x['property'] == 'foo'
                assert x['filler'] == 'X:1'
            assert len(xs) == 1
Exemplo n.º 2
0
def test_errors_gaf():
    config = assocparser.AssocParserConfig(ecomap=EcoMap())
    p = GafParser(config=config)
    assocs = p.parse(open("tests/resources/errors.gaf", "r"), skipheader=True)
    msgs = p.report.messages
    print(json.dumps(p.report.to_report_json(), indent=4))
    # print("MESSAGES: {}".format(len(msgs)))
    n_invalid_idspace = 0
    for m in msgs:
        print("MESSAGE: {}".format(m))
        if m['type'] == assocparser.Report.INVALID_IDSPACE:
            n_invalid_idspace += 1
    assert len(msgs) == 13
    assert n_invalid_idspace == 1
    assert len(assocs) == 2

    w = GafWriter()
    w.write(assocs)
    for a in assocs:
        if a.object_extensions != []:
            # our test file has no ORs, so in DNF this is always the first
            xs = a.object_extensions[0].elements
            print(xs)
            for x in xs:

                print('X: {}'.format(x))
                # ensure that invalid expressions have been eliminated
                assert x.relation == association.Curie("BFO", "0000050")
                assert x.term == association.Curie.from_str('X:1')
            assert len(xs) == 1
Exemplo n.º 3
0
def test_errors_gaf():
    p = GafParser()
    p.config.ecomap = EcoMap()
    assocs = p.parse(open("tests/resources/errors.gaf", "r"))
    msgs = p.report.messages
    print("MESSAGES: {}".format(len(msgs)))
    for m in msgs:
        print("MESSAGE: {}".format(m))
    assert len(msgs) == 15

    # we expect 4
    assert len(assocs) == 7
    from ontobio.io import GafWriter
    w = GafWriter()
    w.write(assocs)
    for a in assocs:
        if 'object_extensions' in a:
            # our test file has no ORs, so in DNF this is always the first
            xs = a['object_extensions']['union_of'][0]['intersection_of']
            for x in xs:

                print('X: {}'.format(x))
                # ensure that invalid expressions have been eliminated
                assert x['property'] == 'foo'
                assert x['filler'] == 'X:1'
            assert len(xs) == 1
Exemplo n.º 4
0
 def __init__(self, writer=None):
     if writer is None:
         writer = TurtleRdfWriter()
     self.writer = writer
     self.include_subject_info = False
     self.ecomap = EcoMap()
     self._emit_header_done = False
     self.uribase = 'http://example.org/'
Exemplo n.º 5
0
def test_errors_gaf():
    p = GafParser()
    p.config.ecomap = EcoMap()
    assocs = p.parse(open("tests/resources/errors.gaf", "r"))
    msgs = p.report.messages
    print("MESSAGES: {}".format(len(msgs)))
    for m in msgs:
        print("MESSAGE: {}".format(m))
    assert len(msgs) == 8
Exemplo n.º 6
0
 def __init__(self, writer=None):
     if writer is None:
         writer = TurtleRdfWriter()
     self.writer = writer
     self.include_subject_info = False
     self.ecomap = EcoMap()
     self._emit_header_done = False
     self.uribase = writer.base
     self.ro = None
     self.ecomap.mappings()
Exemplo n.º 7
0
def test_alt_id_repair():
    p = GafParser()
    ont = OntologyFactory().create(ALT_ID_ONT)
    p.config.ecomap = EcoMap()
    p.config.ontology = ont
    gaf = io.StringIO("SGD\tS000000819\tAFG3\t\tGO:0043623\tPMID:8681382|SGD_REF:S000055187\tIMP\t\tP\tMitochondrial inner membrane m-AAA protease component\tYER017C|AAA family ATPase AFG3|YTA10\tgene\ttaxon:559292\t20170428\tSGD")

    assocs = p.parse(gaf, skipheader=True)
    assert len(assocs) > 0
    assert assocs[0]["object"]["id"] == "GO:0043623"
Exemplo n.º 8
0
def test_skim_gaf():
    p = GafParser()
    p.config.ecomap = EcoMap()
    results = p.skim(open(POMBASE, "r"))
    assert len(results) == 370
    for r in results:
        print(str(r))
        (s, sn, o) = r
        assert o.startswith('GO:')
        assert s.startswith('PomBase:')
Exemplo n.º 9
0
def test_skim_gpad():
    p = GpadParser()
    p.config.ecomap = EcoMap()
    results = p.skim(open(POMBASE_GPAD, "r"))
    assert len(results) == 1984
    for r in results:
        print(str(r))
        (s, sn, o) = r
        assert o.startswith('GO:')
        assert s.startswith('PomBase:') or s.startswith('PR:')
Exemplo n.º 10
0
 def __init__(self, writer=None):
     if writer is None:
         writer = TurtleRdfWriter()
     self.writer = writer
     self.include_subject_info = False
     self.ecomap = EcoMap()
     self._emit_header_done = False
     self.uribase = writer.base
     self.ro = None
     self.ecomap.mappings()
     self.bad_chars_regex = re.compile("[^\.:_\-0-9a-zA-Z]")
Exemplo n.º 11
0
def test_ecomap():
    """
    test mappings between GAF codes and ECO
    """
    m = EcoMap()
    assert m.coderef_to_ecoclass('IEA', 'GO_REF:0000002') == 'ECO:0000256'
    assert m.coderef_to_ecoclass('IEA') == 'ECO:0000501'
    assert m.coderef_to_ecoclass('IEA', 'FAKE:ID') == 'ECO:0000501'
    assert m.ecoclass_to_coderef('ECO:0000501') == ('IEA', None)
    assert m.ecoclass_to_coderef('ECO:0000256') == ('IEA', 'GO_REF:0000002')
    assert m.coderef_to_ecoclass('BADCODE', None) == None
    assert m.coderef_to_ecoclass('BADCODE', 'GO_REF:xxx') == None
    assert m.ecoclass_to_coderef('ECO:9999999999999999999') == (None, None)
    assert m.coderef_to_ecoclass('ISO', None) == 'ECO:0000266'
Exemplo n.º 12
0
def parse_with(f, p):
    p.config.ecomap = EcoMap()
    is_gaf = f == POMBASE
    ont = OntologyFactory().create(ONT)

    if is_gaf:
        # only do ontology checking on GAF parse;
        # this is because ontology is made from GAF
        p.config.ontology = ont
    else:
        p.config.ontology = None

    results = p.parse(open(f, "r"), skipheader=True)
    print(p.report.to_markdown())
    r1 = results[0]
    # TODO: test datafile does not have ECOs yet!!
    assert ecomap.ecoclass_to_coderef(str(
        r1.evidence.type))[0] == 'ISO' or str(
            r1.evidence.type) == 'ECO:0000201'
    assert r1.evidence.with_support_from == [
        association.ConjunctiveSet(
            [association.Curie.from_str('SGD:S000001583')])
    ]
    assert r1.evidence.has_supporting_reference == [
        association.Curie.from_str('GO_REF:0000024')
    ]

    if is_gaf:
        assert r1.subject.label == 'ypf1'
        assert association.ymd_str(r1.date, "") == '20150305'

    for r in results:
        #print(str(r))
        sid = r.subject.id
        prov = r.provided_by
        assert prov == 'PomBase' or prov == 'UniProt'
        assert r.object.id.namespace == "GO"
        assert sid.namespace == 'PomBase' or (not is_gaf
                                              and sid.namespace == 'PR')
        if is_gaf:
            assert str(r.subject.taxon) == 'NCBITaxon:4896'

    # for m in p.report.messages:
    #     print("MESSAGE: {}".format(m))
    print("MESSAGES (sample): {}".format(p.report.messages[0:5]))
    # Messages that aren't about upgrading qualifiers in rule 59 should be 0
    assert len([
        msg for msg in p.report.messages
        if msg["rule"] != 59 and msg["rule"] != 27
    ]) == 0
Exemplo n.º 13
0
def test_alt_id_repair():
    p = GafParser()
    ont = OntologyFactory().create(ALT_ID_ONT)
    p.config.ecomap = EcoMap()
    p.config.ontology = ont

    gaf = io.StringIO("SGD\tS000000819\tAFG3\t\tGO:1\tPMID:8681382|SGD_REF:S000055187\tIMP\t\tP\tMitochondrial inner membrane m-AAA protease component\tYER017C|AAA family ATPase AFG3|YTA10\tgene\ttaxon:559292\t20170428\tSGD")
    assocs = p.parse(gaf, skipheader=True)
    # GO:1 is obsolete, and has replaced by GO:0034622, so we should see that class ID.
    assert assocs[0]["object"]["id"] == "GO:2"

    gaf = io.StringIO("SGD\tS000000819\tAFG3\t\tGO:4\tPMID:8681382|SGD_REF:S000055187\tIMP\t\tP\tMitochondrial inner membrane m-AAA protease component\tYER017C|AAA family ATPase AFG3|YTA10\tgene\ttaxon:559292\t20170428\tSGD")
    assocs = p.parse(gaf, skipheader=True)
    # GO:4 is obsolete due to it being merged into GO:3
    assert assocs[0]["object"]["id"] == "GO:3"
Exemplo n.º 14
0
def test_skim_gaf_qualifiers():
    p = GafParser()
    p.config.ecomap = EcoMap()
    p.config.remove_double_prefixes = True
    results = p.skim(open(QGAF, "r"))
    for r in results:
        print(str(r))
        (s, sn, o) = r
        assert o.startswith('GO:')
        assert s.startswith('MGI:') or s.startswith('PomBase')
    assert len(results) == 5  # ensure NOTs are skipped

    p.config.exclude_relations = ['contributes_to', 'colocalizes_with']
    results = p.skim(open(QGAF, "r"))
    for r in results:
        (s, sn, o) = r
        assert o.startswith('GO:')
        assert s.startswith('MGI:') or s.startswith('PomBase')
    assert len(results) == 3  # ensure NOTs and excludes relations skipped
Exemplo n.º 15
0
def parse_with(f, p):
    p.config.ecomap = EcoMap()
    is_gaf = f == POMBASE
    ont = OntologyFactory().create(ONT)

    if is_gaf:
        # only do ontology checking on GAF parse;
        # this is because ontology is made from GAF
        p.config.ontology = ont
    else:
        p.config.ontology = None

    results = p.parse(open(f, "r"), skipheader=True)
    print(p.report.to_markdown())
    r1 = results[0]
    # TODO: test datafile does not have ECOs yet!!
    assert r1['evidence']['type'] == 'ISO' or r1['evidence'][
        'type'] == 'ECO:0000201'
    assert r1['evidence']['with_support_from'] == ['SGD:S000001583']
    assert r1['evidence']['has_supporting_reference'] == ['GO_REF:0000024']

    if is_gaf:
        assert r1['subject']['label'] == 'ypf1'
        assert r1['date'] == '20150305'

    for r in results:
        #print(str(r))
        sid = r['subject']['id']
        prov = r['provided_by']
        assert prov == 'PomBase' or prov == 'UniProt'
        assert r['object']['id'].startswith('GO:')
        assert sid.startswith('PomBase:') or (not is_gaf
                                              and sid.startswith('PR'))
        if is_gaf:
            assert r['subject']['taxon']['id'] == 'NCBITaxon:4896'

    # for m in p.report.messages:
    #     print("MESSAGE: {}".format(m))
    print("MESSAGES (sample): {}".format(p.report.messages[0:5]))
    assert len(p.report.messages) == 0
    print(p.report.to_markdown())
Exemplo n.º 16
0
def test_errors_gaf():
    p = GafParser()
    p.config.ecomap = EcoMap()
    assocs = p.parse(open("tests/resources/errors.gaf", "r"))
    msgs = p.report.messages
    print("MESSAGES: {}".format(len(msgs)))
    for m in msgs:
        print("MESSAGE: {}".format(m))
    assert len(msgs) == 15

    # we expect 4
    assert len(assocs) == 7
    from ontobio.io import GafWriter
    w = GafWriter()
    w.write(assocs)
    xs = [x for a in assocs for x in a.get('object_extensions', [])]
    for x in xs:
        print('X: {}'.format(x))
        # ensure that invalid expressions have been eliminated
        assert x['property'] == 'foo'
        assert x['filler'] == 'X:1'
    assert len(xs) == 3
Exemplo n.º 17
0
def test_convert_gaf_to_gpad():
    p = GafParser()
    p.config.ecomap = EcoMap()
    w = GpadWriter()
    p2 = GpadParser()
    convert(POMBASE, p, w, p2)
Exemplo n.º 18
0
from ontobio.io import assocparser
from ontobio.io.gpadparser import GpadParser
from ontobio.io import gafparser
from ontobio.io.gafparser import GafParser
from ontobio.io import GafWriter
from ontobio.io.assocwriter import GpadWriter
from ontobio.assoc_factory import AssociationSetFactory
from ontobio.ontol_factory import OntologyFactory
from ontobio.model import association
from ontobio.rdfgen import relations

from ontobio.ecomap import EcoMap

ecomap = EcoMap()
ecomap.mappings()

import tempfile
import logging
import pytest
import io
import json

POMBASE = "tests/resources/truncated-pombase.gaf"
POMBASE_GPAD = "tests/resources/truncated-pombase.gpad"
ONT = "tests/resources/go-truncated-pombase.json"
QGAF = "tests/resources/test-qualifiers.gaf"


def test_skim_gaf():
    p = GafParser()
    p.config.ecomap = EcoMap()
Exemplo n.º 19
0
 def __init__(self, filter_rule : FilterRule):
     self.filter_rule = filter_rule
     self.ecomap = EcoMap()