Пример #1
0
def test_roundtrip():
    """
    Start with a line, parse it, then write it. The beginning line should be the same as what was written.
    """
    line = "PomBase\tSPAC25B8.17\typf1\t\tGO:0000006\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:999|taxon:888\t20150305\tPomBase\tfoo(X:1)\tUniProtKB:P12345"
    parser = gafparser.GafParser()
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out)
    assoc_dict = parser.parse_line(line).associations[0]
    writer.write_assoc(assoc_dict)
    gaf = [
        line for line in out.getvalue().split("\n") if not line.startswith("!")
    ][0]
    assert line == gaf

    # Single taxon
    line = "PomBase\tSPAC25B8.17\typf1\t\tGO:0000006\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:1111\t20150305\tPomBase\tfoo(X:1)\tUniProtKB:P12345"
    parser = gafparser.GafParser()
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out)
    assoc_dict = parser.parse_line(line).associations[0]
    writer.write_assoc(assoc_dict)
    gaf = [
        line for line in out.getvalue().split("\n") if not line.startswith("!")
    ][0]
    assert line == gaf
Пример #2
0
def test_gaf2_2_qualifier_to_gaf2_1():
    # Qualifier is `part_of` and should be returned blank instead of removing the whole line
    line = "WB\tWBGene00000001\taap-1\tinvolved_in\tGO:0008286\tWB_REF:WBPaper00005614|PMID:12393910\tIMP\t\tP\t\tY110A7A.10\tgene\ttaxon:6239\t20060302\tWB\t\t"
    parser = gafparser.GafParser()
    parser.version = "2.2"
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out,
                                   version="2.1")  # Write out to gaf 2.1

    assoc = parser.parse_line(line).associations[0]
    writer.write_assoc(assoc)
    gpad_to_gaf_line = [
        line for line in out.getvalue().split("\n") if not line.startswith("!")
    ][0]
    assert gpad_to_gaf_line.split("\t")[3] == ""

    # Test with a `NOT`
    line = "WB\tWBGene00000001\taap-1\tNOT|involved_in\tGO:0008286\tWB_REF:WBPaper00005614|PMID:12393910\tIMP\t\tP\t\tY110A7A.10\tgene\ttaxon:6239\t20060302\tWB\t\t"
    parser = gafparser.GafParser()
    parser.version = "2.2"
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out,
                                   version="2.1")  # Write out to gaf 2.1

    assoc = parser.parse_line(line).associations[0]
    writer.write_assoc(assoc)
    gpad_to_gaf_line = [
        line for line in out.getvalue().split("\n") if not line.startswith("!")
    ][0]
    assert gpad_to_gaf_line.split("\t")[3] == "NOT"
Пример #3
0
def test_gpad_qualifier_removed_in_gaf_2_1():
    # Qualifier is `part_of` and should be returned blank instead of removing the whole line
    line = "PomBase\tSPBC1348.01\tpart_of\tGO:0009897\tGO_REF:0000051\tECO:0000266\t\t\t20060201\tPomBase\t\t"
    parser = gpadparser.GpadParser()
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out,
                                   version="2.1")  # Write out to gaf 2.1

    assoc = parser.parse_line(line).associations[0]
    writer.write_assoc(assoc)
    gpad_to_gaf_line = [
        line for line in out.getvalue().split("\n") if not line.startswith("!")
    ][0]
    assert gpad_to_gaf_line.split("\t")[3] == ""

    # Test with a `NOT`
    line = "PomBase\tSPBC1348.01\tNOT|part_of\tGO:0009897\tGO_REF:0000051\tECO:0000266\t\t\t20060201\tPomBase\t\t"
    parser = gpadparser.GpadParser()
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out,
                                   version="2.1")  # Write out to gaf 2.1

    assoc = parser.parse_line(line).associations[0]
    writer.write_assoc(assoc)
    gpad_to_gaf_line = [
        line for line in out.getvalue().split("\n") if not line.startswith("!")
    ][0]
    assert gpad_to_gaf_line.split("\t")[3] == "NOT"
Пример #4
0
    def parse_gpad_vals_to_gaf_io(gpad_vals):
        parser = gpadparser.GpadParser()
        gaf_out = io.StringIO()
        writer = assocwriter.GafWriter(file=gaf_out)

        assoc = parser.parse_line("\t".join(gpad_vals)).associations[0]
        writer.write_assoc(assoc)
        return gaf_out
Пример #5
0
def test_gaf_writer():
    association = {
        "subject": {
            "id": "PomBase:SPAC25B8.17",
            "label": "ypf1",
            "type": "protein",
            "fullname":
            "intramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)",
            "synonyms": ["ppp81"],
            "taxon": {
                "id": "NCBITaxon:4896"
            }
        },
        "object": {
            "id": "GO:0000006",
            "taxon": "NCBITaxon:4896"
        },
        "negated":
        False,
        "qualifiers": [],
        "aspect":
        "C",
        "relation": {
            "id": "part_of"
        },
        "interacting_taxon":
        "NCBITaxon:555",
        "evidence": {
            "type": "ISO",
            "has_supporting_reference": ["GO_REF:0000024"],
            "with_support_from": ["SGD:S000001583"]
        },
        "provided_by":
        "PomBase",
        "date":
        "20150305",
        "subject_extensions": [{
            "property": "isoform",
            "filler": "UniProtKB:P12345"
        }],
        "object_extensions": {
            "union_of": [{
                "intersection_of": [{
                    "property": "foo",
                    "filler": "X:1"
                }]
            }]
        }
    }
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out)

    expected = "PomBase\tSPAC25B8.17\typf1\t\tGO:0000006\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896|taxon:555\t20150305\tPomBase\tfoo(X:1)\tUniProtKB:P12345"
    writer.write_assoc(association)
    gaf = [
        line for line in out.getvalue().split("\n") if not line.startswith("!")
    ][0]
    assert expected == gaf
Пример #6
0
def test_full_gaf_2_2_write():
    line = "WB\tWBGene00000001\taap-1\tinvolved_in\tGO:0008286\tWB_REF:WBPaper00005614|PMID:12393910\tIMP\t\tP\t\tY110A7A.10\tgene\ttaxon:6239\t20060302\tWB\t\t"
    parser = gafparser.GafParser()
    parser.version = "2.2"
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out, version="2.2")

    assoc = parser.parse_line(line).associations[0]
    writer.write_assoc(assoc)
    out_line = [
        line for line in out.getvalue().split("\n") if not line.startswith("!")
    ][0]
    assert out_line.split("\t") == line.split("\t")
Пример #7
0
def test_gaf_writer():
    association = GoAssociation(
        source_line="",
        subject=Subject(
            id=Curie("PomBase", "SPAC25B8.17"),
            label="ypf1",
            type=["protein"],
            fullname=[
                "intramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)"
            ],
            synonyms=["ppp81"],
            taxon=Curie("NCBITaxon", "4896")),
        object=Term(id=Curie("GO", "0000006"),
                    taxon=Curie("NCBITaxon", "4896")),
        negated=False,
        qualifiers=[],
        aspect=Aspect("C"),
        relation=Curie("BFO", "0000050"),
        interacting_taxon=Curie("NCBITaxon", "555"),
        evidence=Evidence(
            type=Curie("ECO", "0000266"),
            has_supporting_reference=[Curie("GO_REF", "0000024")],
            with_support_from=[
                ConjunctiveSet(elements=[Curie("SGD", "S000001583")])
            ]),
        provided_by=Provider("PomBase"),
        date=Date(year="2015", month="03", day="05", time=""),
        subject_extensions=[
            ExtensionUnit(relation=Curie("rdfs", "subClassOf"),
                          term=Curie("UniProtKB", "P12345"))
        ],
        object_extensions=[
            ConjunctiveSet(elements=[
                ExtensionUnit(relation=Curie("BFO", "0000050"),
                              term=Curie("X", "1"))
            ])
        ],
        properties=dict())
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out)
    # `out` will get written with gaf lines from the above assocation object
    expected = "PomBase\tSPAC25B8.17\typf1\t\tGO:0000006\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896|taxon:555\t20150305\tPomBase\tpart_of(X:1)\tUniProtKB:P12345"
    writer.write_assoc(association)
    print(out.getvalue())
    gaf = [
        line.strip("\n") for line in out.getvalue().split("\n")
        if not line.startswith("!")
    ][0]
    assert expected == gaf
Пример #8
0
def test_single_entry_extension():
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out)

    expression = {
        "union_of": [{
            "intersection_of": [{
                "property": "foo",
                "filler": "X:1"
            }]
        }]
    }

    extension = writer._extension_expression(expression)
    assert "foo(X:1)" == extension
Пример #9
0
def validate_input(example: RuleExample, parser: assocparser.AssocParser, config=None) -> Parsed:
    if config:
        parser.config = config

    out = []
    writer = assocwriter.GafWriter(file=io.StringIO())

    assocs_gen = parser.association_generator(file=io.StringIO(example.input), skipheader=True)
    for assoc in assocs_gen:
        out.append(writer.tsv_as_string(writer.as_tsv(assoc)))

    rule_messages = parser.report.reporter.messages.get(example.rule_id, [])
    rule_messages.extend(parser.report.reporter.messages.get("gorule-0000001", []))

    # We have to also parse the expected result if we are in a repair to normalize all the data
    expected_out = []
    if example.example_type == ExampleType.REPAIR:
        expected_parsed_gen = create_base_parser(example.format).association_generator(file=io.StringIO(example.expected), skipheader=True)
        expected_writer = assocwriter.GafWriter(file=io.StringIO())
        for assoc in expected_parsed_gen:
            expected_out.append(expected_writer.tsv_as_string(expected_writer.as_tsv(assoc)))

    # We only collect the messages from *our* rule we're in
    return Parsed(report=rule_messages, output="\n".join(out), expected="\n".join(expected_out))
Пример #10
0
def test_negated_qualifers():
    gaf = [
        "PomBase", "SPBC11B10.09", "cdc2", "NOT", "GO:0007275",
        "PMID:21873635", "ISO", "PANTHER:PTN000623979|TAIR:locus:2099478", "P",
        "Cyclin-dependent kinase 1", "UniProtKB:P04551|PTN000624043",
        "protein", "taxon:284812", "20170228", "GO_Central", "", ""
    ]
    parser = gafparser.GafParser()
    result = parser.parse_line("\t".join(gaf))
    writer = assocwriter.GafWriter()
    parsed = writer.as_tsv(result.associations[0])
    print(parsed)
    assert parsed[3] == "NOT"

    writer = assocwriter.GpadWriter()
    parsed = writer.as_tsv(result.associations[0])
    print(parsed)
    assert parsed[2] == "NOT|involved_in"
Пример #11
0
def test_intersection_extensions():
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out)

    expression = {
        "union_of": [{
            "intersection_of": [{
                "property": "foo",
                "filler": "X:1"
            }, {
                "property": "foo",
                "filler": "X:2"
            }]
        }, {
            "intersection_of": [{
                "property": "bar",
                "filler": "Y:1"
            }]
        }]
    }

    extension = writer._extension_expression(expression)
    assert "foo(X:1),foo(X:2)|bar(Y:1)" == extension
Пример #12
0
def test_empty_extension_expression():
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out)

    extension = writer._extension_expression({})
    assert extension == ""
Пример #13
0
def test_full_taxon_empty_string_interacting_taxon():
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out)

    taxon_field = writer._full_taxon_field("taxon:12345", "")
    assert "taxon:12345" == taxon_field
Пример #14
0
def test_full_taxon_field_interacting():
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out)

    taxon_field = writer._full_taxon_field("taxon:12345", "taxon:6789")
    assert "taxon:12345|taxon:6789" == taxon_field
Пример #15
0
def test_full_taxon_field_single_taxon():
    out = io.StringIO()
    writer = assocwriter.GafWriter(file=out)

    taxon_field = writer._full_taxon_field("taxon:12345", None)
    assert "taxon:12345" == taxon_field