def test_write_BII_I_1_investigation(BII_I_1_investigation_file, tmp_path):
    # Read Investigation from file-like object
    with pytest.warns(IsaWarning) as record:
        investigation = InvestigationReader.from_stream(
            BII_I_1_investigation_file).read()
        InvestigationValidator(investigation).validate()
    # Check warnings
    assert 1 == len(record)
    msg = "Skipping empty ontology source: , , , "
    assert record[0].category == ParseIsatabWarning
    assert str(record[0].message) == msg
    # Write Investigation to temporary file
    path1 = tmp_path / "i_investigation.txt"
    with open(path1, "wt") as file:
        InvestigationWriter.from_stream(investigation,
                                        file,
                                        lineterminator="\n").write()
    # Read Investigation from temporary file
    with open(path1, "rt") as file:
        reader = InvestigationReader.from_stream(file)
        investigation = reader.read()
    # Write Investigation to second temporary file
    path2 = tmp_path / "i_investigation_2.txt"
    with open(path2, "wt") as file:
        InvestigationWriter.from_stream(investigation,
                                        file,
                                        lineterminator="\n").write()
    # Compare input and output
    assert filecmp.cmp(path1, path2, shallow=False)
Exemplo n.º 2
0
def test_assay_reader_minimal_assay_iostring2(minimal_investigation_file,
                                              minimal_assay_file):
    # Load investigation (tested elsewhere)
    stringio = io.StringIO(minimal_investigation_file.read())
    investigation = InvestigationReader.from_stream(stringio).read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()

    # Check warnings
    assert 2 == len(record)

    # Create new assay reader and read from StringIO with no filename indicated
    stringio = io.StringIO(minimal_assay_file.read())
    reader = AssayReader.from_stream("S1", "A1", stringio)
    assert 5 == len(reader.header)

    # Read and validate assay
    assay = reader.read()
    AssayValidator(investigation, investigation.studies[0],
                   investigation.studies[0].assays[0], assay).validate()

    # Check results
    assert str(assay.file) == os.path.normpath("<no file>")
    assert 5 == len(assay.header)
    assert 3 == len(assay.materials)
    assert 1 == len(assay.processes)
    assert 3 == len(assay.arcs)
def test_write_full2_investigation(full2_investigation_file, tmp_path):
    # Read Investigation from file-like object
    with pytest.warns(IsaWarning) as record:
        investigation = InvestigationReader.from_stream(
            full2_investigation_file).read()
        InvestigationValidator(investigation).validate()
    # Check warnings
    assert 3 == len(record)
    msg = "Study with incomplete minimal information (ID and path):\nID:\t\nTitle:\t\nPath:\t"
    assert record[0].category == CriticalIsaValidationWarning
    assert str(record[0].message) == msg
    msg = "Study without title:\nID:\t\nTitle:\t\nPath:\t"
    assert record[1].category == ModerateIsaValidationWarning
    assert str(record[1].message) == msg
    msg = (
        "Assay with incomplete minimal information (path, measurement and technology type):\n"
        "Path:\t\n"
        "Measurement Type:\tmetabolite profiling\n"
        "Technology Type:\tmass spectrometry\n"
        "Technology Platform:\tLC-MS/MS")
    assert record[2].category == CriticalIsaValidationWarning
    assert str(record[2].message) == msg
    # Write Investigation to temporary file
    path = tmp_path / "i_fullinvest2.txt"
    with open(path, "wt") as file:
        InvestigationWriter.from_stream(investigation,
                                        file,
                                        lineterminator="\n").write()
    # Compare input and output
    assert filecmp.cmp(full2_investigation_file.name, path, shallow=False)
Exemplo n.º 4
0
def run_warnings_caught(args):
    # Read investigation
    investigation = InvestigationReader.from_stream(args.input_investigation_file).read()
    args.input_investigation_file.close()

    # Validate investigation
    InvestigationValidator(investigation).validate()

    # Read studies and assays
    path_in = os.path.normpath(os.path.dirname(args.input_investigation_file.name))
    studies = {}
    assays = {}
    for s, study_info in enumerate(investigation.studies):
        if study_info.info.path:
            with open(os.path.join(path_in, study_info.info.path), "rt") as inputf:
                studies[s] = StudyReader.from_stream("S{}".format(s + 1), inputf).read()
        if study_info.assays:
            assays[s] = {}
        for a, assay_info in enumerate(study_info.assays):
            if assay_info.path:
                with open(os.path.join(path_in, assay_info.path), "rt") as inputf:
                    assays[s][a] = AssayReader.from_stream(
                        "S{}".format(s + 1), "A{}".format(a + 1), inputf
                    ).read()

    # Validate studies and assays
    for s, study_info in enumerate(investigation.studies):
        if study_info.info.path:
            StudyValidator(investigation, study_info, studies[s]).validate()
        for a, assay_info in enumerate(study_info.assays):
            if assay_info.path:
                AssayValidator(investigation, study_info, assay_info, assays[s][a]).validate()
Exemplo n.º 5
0
def test_study_reader_minimal_study_iostring2(minimal_investigation_file,
                                              minimal_study_file):
    # Load investigation (tested elsewhere)
    stringio = io.StringIO(minimal_investigation_file.read())
    investigation = InvestigationReader.from_stream(stringio).read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()

    # Check warnings
    assert 2 == len(record)

    # Create new study reader and read from StringIO with no filename indicated
    stringio = io.StringIO(minimal_study_file.read())
    reader = StudyReader.from_stream("S1", stringio)
    assert 3 == len(reader.header)

    # Read study
    study = reader.read()
    StudyValidator(investigation, investigation.studies[0], study).validate()

    # Check results
    assert str(study.file) == "<no file>"
    assert 3 == len(study.header)
    assert 2 == len(study.materials)
    assert 1 == len(study.processes)
    assert 2 == len(study.arcs)
def test_parse_assays_investigation(assays_investigation_file):
    # Read Investigation from file-like object
    reader = InvestigationReader.from_stream(assays_investigation_file)
    investigation = reader.read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()

    # Check warnings
    assert 5 == len(record)
    msg = "No assays declared in study 's_assays' of investigation 'i_assays.txt'"
    assert record[0].category == CriticalIsaValidationWarning
    assert str(record[0].message) == msg
    msg = "Study identifier used more than once: s_assays"
    assert record[1].category == CriticalIsaValidationWarning
    assert str(record[1].message) == msg
    msg = "Study path used more than once: s_assays.txt"
    assert record[2].category == CriticalIsaValidationWarning
    assert str(record[2].message) == msg
    msg = "Study title used more than once: Minimal Germline Study"
    assert record[3].category == ModerateIsaValidationWarning
    assert str(record[3].message) == msg

    # Check results
    # Investigation
    assert investigation

    # Studies
    assert 2 == len(investigation.studies)

    # Assays
    assert 0 == len(investigation.studies[0].assays)
    assert 0 == len(investigation.studies[1].assays)
Exemplo n.º 7
0
def test_parse_warnings_investigation(warnings_investigation_file):
    # Read Investigation from file-like object
    reader = InvestigationReader.from_stream(warnings_investigation_file)
    investigation = reader.read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()

    # Check warnings
    messages = [str(x.message) for x in record]
    print(messages)
    assert 15 == len(record)
    assert "Invalid mail address: invalid_mail" in messages
    assert "Invalid phone/fax number: CALL-ME" in messages
    assert "Invalid phone/fax number: FAX-ME" in messages
    assert "Invalid pubmed_id string: not-pubmed" in messages
    assert "Invalid doi string: not-a-doi" in messages
    assert "Assay path used more than once: a_warnings.txt" in messages
    assert [m for m in messages if m.startswith("Assay without platform")]
    assert 4 == len(
        [m for m in messages if m.startswith("Incomplete ontology source")])

    # Check results
    # Investigation
    assert investigation

    # Ontology sources
    assert 5 == len(investigation.ontology_source_refs)
    expected = models.OntologyRef(
        "OBI",
        "http://data.bioontology.org/ontologies/OBI",
        "31",
        "Ontology for Biomedical Investigations",
        (),
        [*investigation_headers.ONTOLOGY_SOURCE_REF_KEYS],
    )
    assert expected == investigation.ontology_source_refs["OBI"]

    # Basic info
    assert "Investigation with Warnings" == investigation.info.title
    assert "i_warnings" == investigation.info.identifier

    # Studies
    assert len(investigation.studies) == 1
    assert "s_warnings" == investigation.studies[0].info.identifier
    assert "Germline Study with Warnings" == investigation.studies[
        0].info.title
    assert Path("s_warnings.txt") == investigation.studies[0].info.path

    # Assays
    assert len(investigation.studies[0].assays) == 2
    assay = investigation.studies[0].assays[0]
    assert Path("a_warnings.txt") == assay.path

    # Study contacts
    assert 0 == len(investigation.studies[0].contacts)
Exemplo n.º 8
0
def test_parse_only_investigation(only_investigation_file):
    # Read Investigation from file-like object
    reader = InvestigationReader.from_stream(only_investigation_file)
    investigation = reader.read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()

    # Check warnings
    assert 1 == len(record)
    msg = "No studies declared in investigation: i_onlyinvest.txt"
    assert record[0].category == CriticalIsaValidationWarning
    assert str(record[0].message) == msg
def test_write_comment_investigation(comment_investigation_file, tmp_path):
    # Read Investigation from file-like object
    investigation = InvestigationReader.from_stream(
        comment_investigation_file).read()
    InvestigationValidator(investigation).validate()
    # Write Investigation to temporary file
    path = tmp_path / "i_comments.txt"
    with open(path, "wt") as file:
        InvestigationWriter.from_stream(investigation,
                                        file,
                                        lineterminator="\n").write()
    # Compare input and output
    assert filecmp.cmp(comment_investigation_file.name, path, shallow=False)
Exemplo n.º 10
0
def test_write_assays_investigation(assays_investigation_file, tmp_path):
    # Read Investigation from file-like object
    investigation = InvestigationReader.from_stream(
        assays_investigation_file).read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()
    # Check warnings
    assert 5 == len(record)
    # Write Investigation to temporary file
    path1 = tmp_path / "i_assays.txt"
    with pytest.warns(IsaWarning) as record:
        with open(path1, "wt") as file:
            InvestigationWriter.from_stream(investigation,
                                            file,
                                            lineterminator="\n").write()
    # Check warnings
    assert 12 == len(record)
    # Read Investigation from temporary file
    with open(path1, "rt") as file:
        reader = InvestigationReader.from_stream(file)
        investigation = reader.read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()
    # Check warnings
    assert 5 == len(record)
    # Write Investigation to second temporary file
    path2 = tmp_path / "i_assays_2.txt"
    with pytest.warns(IsaWarning) as record:
        with open(path2, "wt") as file:
            InvestigationWriter.from_stream(investigation,
                                            file,
                                            lineterminator="\n").write()
    # Check warnings
    assert 12 == len(record)
    # Compare input and output
    assert filecmp.cmp(path1, path2, shallow=False)
Exemplo n.º 11
0
def _parse_write_assert(investigation_file, tmp_path, quote=None):
    # Load investigation
    investigation = InvestigationReader.from_stream(investigation_file).read()
    InvestigationValidator(investigation).validate()
    directory = os.path.normpath(os.path.dirname(investigation_file.name))
    # Iterate studies
    for s, study_info in enumerate(investigation.studies):
        # Load study
        path_in = os.path.join(directory, study_info.info.path)
        with open(path_in, "rt") as inputf:
            study = StudyReader.from_stream("S{}".format(s + 1), inputf).read()
        StudyValidator(investigation, study_info, study).validate()
        # Write study to temporary file
        path_out = tmp_path / study_info.info.path
        with open(path_out, "wt", newline="") as file:
            StudyWriter.from_stream(study, file, quote=quote).write()
        assert filecmp.cmp(path_in, path_out, shallow=False)
Exemplo n.º 12
0
def _parse_write_assert_assay(investigation_file,
                              tmp_path,
                              quote=None,
                              normalize=False,
                              skip=None):
    # Load investigation
    investigation = InvestigationReader.from_stream(investigation_file).read()
    InvestigationValidator(investigation).validate()
    directory = os.path.normpath(os.path.dirname(investigation_file.name))
    # Iterate assays
    for s, study_info in enumerate(investigation.studies):
        for a, assay_info in enumerate(study_info.assays):
            if skip and str(assay_info.path) in skip:
                continue
            # Load assay
            path_in = os.path.join(directory, assay_info.path)
            with open(path_in, "rt") as inputf:
                assay = AssayReader.from_stream("S{}".format(s + 1),
                                                "A{}".format(a + 1),
                                                inputf).read()
            AssayValidator(investigation, study_info, assay_info,
                           assay).validate()
            # Write assay to temporary file
            path_out = tmp_path / assay_info.path
            with open(path_out, "wt", newline="") as file:
                AssayWriter.from_stream(assay, file, quote=quote).write()
            if normalize:
                # Read and write assay again
                path_in = path_out
                with open(path_out, "rt") as inputf:
                    assay = AssayReader.from_stream("S{}".format(s + 1),
                                                    "A{}".format(a + 1),
                                                    inputf).read()
                AssayValidator(investigation, study_info, assay_info,
                               assay).validate()
                path_out = tmp_path / (assay_info.path.name + "_b")
                with open(path_out, "wt", newline="") as file:
                    AssayWriter.from_stream(assay, file, quote=quote).write()
            # Sort and compare input and output
            path_in_s = tmp_path / (assay_info.path.name + ".in.sorted")
            path_out_s = tmp_path / (assay_info.path.name + ".out.sorted")
            assert filecmp.cmp(sort_file(path_in, path_in_s),
                               sort_file(path_out, path_out_s),
                               shallow=False)
Exemplo n.º 13
0
def test_parse_minimal_investigation(minimal_investigation_file):
    # Read Investigation from file-like object
    reader = InvestigationReader.from_stream(minimal_investigation_file)
    investigation = reader.read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()

    # Check warnings
    assert 2 == len(record)

    # Check results
    # Investigation
    assert investigation

    # Ontology sources
    assert 1 == len(investigation.ontology_source_refs)
    expected = models.OntologyRef(
        "OBI",
        "http://data.bioontology.org/ontologies/OBI",
        "31",
        "Ontology for Biomedical Investigations",
        (),
        [*investigation_headers.ONTOLOGY_SOURCE_REF_KEYS],
    )
    assert expected == investigation.ontology_source_refs["OBI"]

    # Basic info
    assert "Minimal Investigation" == investigation.info.title
    assert "i_minimal" == investigation.info.identifier

    # Studies
    assert len(investigation.studies) == 1
    assert "s_minimal" == investigation.studies[0].info.identifier
    assert "Minimal Germline Study" == investigation.studies[0].info.title
    assert Path("s_minimal.txt") == investigation.studies[0].info.path

    # Assays
    assert len(investigation.studies[0].assays) == 1
    assay = investigation.studies[0].assays[0]
    assert Path("a_minimal.txt") == assay.path

    # Study contacts
    assert 0 == len(investigation.studies[0].contacts)
Exemplo n.º 14
0
def test_parse_assays_investigation(assays_investigation_file):
    # Read Investigation from file-like object
    warnings.simplefilter("always")
    reader = InvestigationReader.from_stream(assays_investigation_file)
    with pytest.warns(IsaWarning) as record:
        investigation = reader.read()
        InvestigationValidator(investigation).validate()

    # Check warnings
    assert 7 == len(record)
    msg = "Removed trailing whitespaces in fields of line: ['Study Identifier', 's_assays ']"
    assert record[0].category == ParseIsatabWarning
    assert str(record[0].message) == msg
    msg = (
        "Removed trailing whitespaces in fields of line: ['Study Title', ' Minimal Germline Study']"
    )
    assert record[1].category == ParseIsatabWarning
    assert str(record[1].message) == msg
    msg = "No assays declared in study 's_assays' of investigation 'i_assays.txt'"
    assert record[2].category == AdvisoryIsaValidationWarning
    assert str(record[2].message) == msg
    msg = "Study identifier used more than once: s_assays"
    assert record[3].category == CriticalIsaValidationWarning
    assert str(record[3].message) == msg
    msg = "Study path used more than once: s_assays.txt"
    assert record[4].category == CriticalIsaValidationWarning
    assert str(record[4].message) == msg
    msg = "Study title used more than once: Minimal Germline Study"
    assert record[5].category == ModerateIsaValidationWarning
    assert str(record[5].message) == msg

    # Check results
    # Investigation
    assert investigation

    # Studies
    assert 2 == len(investigation.studies)

    # Assays
    assert 0 == len(investigation.studies[0].assays)
    assert 0 == len(investigation.studies[1].assays)
Exemplo n.º 15
0
def test_write_minimal_investigation(minimal_investigation_file, tmp_path):
    # Read Investigation from file-like object
    with pytest.warns(IsaWarning) as record:
        investigation = InvestigationReader.from_stream(
            minimal_investigation_file).read()
        InvestigationValidator(investigation).validate()
    # Check warnings
    assert 1 == len(record)
    # Write Investigation to temporary file
    path = tmp_path / "i_minimal.txt"
    with pytest.warns(IsaWarning) as record:
        with open(path, "wt") as file:
            InvestigationWriter.from_stream(investigation,
                                            file,
                                            lineterminator="\n").write()
    # Check warnings
    assert 6 == len(record)
    msg = "No reference headers available for section STUDY PUBLICATIONS. Applying default order."
    assert record[3].category == WriteIsatabWarning
    assert str(record[3].message) == msg
    # Compare input and output
    assert filecmp.cmp(minimal_investigation_file.name, path, shallow=False)
Exemplo n.º 16
0
def test_write_full_investigation(full_investigation_file, tmp_path):
    # Read Investigation from file-like object
    investigation = InvestigationReader.from_stream(
        full_investigation_file).read()
    InvestigationValidator(investigation).validate()
    # Write Investigation to temporary file
    path1 = tmp_path / "i_fullinvest.txt"
    with open(path1, "wt") as file:
        InvestigationWriter.from_stream(investigation,
                                        file,
                                        lineterminator="\n").write()
    # Read Investigation from temporary file
    with open(path1, "rt") as file:
        reader = InvestigationReader.from_stream(file)
        investigation = reader.read()
    # Write Investigation to second temporary file
    path2 = tmp_path / "i_fullinvest_2.txt"
    with open(path2, "wt") as file:
        InvestigationWriter.from_stream(investigation,
                                        file,
                                        lineterminator="\n").write()
    # Compare input and output
    assert filecmp.cmp(path1, path2, shallow=False)
Exemplo n.º 17
0
def test_assay_reader_minimal_assay(minimal_investigation_file,
                                    minimal_assay_file):
    """Use ``AssayReader`` to read in minimal assay file.

    Using the ``AssayReader`` instead of the ``AssayRowReader`` gives us
    ``Assay`` objects instead of just the row-wise nodes.
    """
    # Load investigation (tested elsewhere)
    investigation = InvestigationReader.from_stream(
        minimal_investigation_file).read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()

    # Check warnings
    assert 1 == len(record)

    # Create new row reader and check read headers
    reader = AssayReader.from_stream("S1", "A1", minimal_assay_file)
    assert 5 == len(reader.header)

    # Read and validate assay
    assay = reader.read()
    AssayValidator(investigation, investigation.studies[0],
                   investigation.studies[0].assays[0], assay).validate()

    # Check results
    assert os.path.normpath(str(assay.file)).endswith(
        os.path.normpath("data/i_minimal/a_minimal.txt"))
    assert 5 == len(assay.header)
    assert 3 == len(assay.materials)
    assert 1 == len(assay.processes)
    assert 3 == len(assay.arcs)

    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-N1",
        "0815-N1",
        None,
        (),
        (),
        (),
        None,
        [table_headers.SAMPLE_NAME],
    )
    assert expected == assay.materials["S1-sample-0815-N1"]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4",
        "0815-N1-DNA1-WES1_L???_???_R1.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == assay.materials[
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4"]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5",
        "0815-N1-DNA1-WES1_L???_???_R2.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == assay.materials[
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5"]

    expected = models.Process(
        "nucleic acid sequencing",
        "S1-A1-0815-N1-DNA1-WES1-3",
        "0815-N1-DNA1-WES1",
        "Assay Name",
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME],
    )
    assert expected == assay.processes["S1-A1-0815-N1-DNA1-WES1-3"]

    expected = (
        models.Arc("S1-sample-0815-N1", "S1-A1-0815-N1-DNA1-WES1-3"),
        models.Arc("S1-A1-0815-N1-DNA1-WES1-3",
                   "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4"),
        models.Arc(
            "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4",
            "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5",
        ),
    )
    assert expected == assay.arcs
Exemplo n.º 18
0
def test_assay_reader_gelelect(gelelect_investigation_file,
                               gelelect_assay_file):
    """Use ``AssayReader`` to read in small assay file."""
    with pytest.warns(IsaWarning) as record:
        # Load investigation
        investigation = InvestigationReader.from_stream(
            gelelect_investigation_file).read()
        InvestigationValidator(investigation).validate()

        # Create new row reader and check read headers
        reader = AssayReader.from_stream("S1", "A1", gelelect_assay_file)
        assert 22 == len(reader.header)

        # Read assay
        assay = reader.read()
        AssayValidator(investigation, investigation.studies[0],
                       investigation.studies[0].assays[0], assay).validate()

    # Check warnings
    assert 4 == len(record)

    # Check results
    assert os.path.normpath(str(assay.file)).endswith(
        os.path.normpath(
            "data/test_gelelect/a_study01_protein_expression_profiling_gel_electrophoresis.txt"
        ))
    assert 22 == len(assay.header)
    assert 9 == len(assay.materials)
    assert 10 == len(assay.processes)
    assert 18 == len(assay.arcs)

    expected = models.Material(
        "Image File",
        "S1-A1-Image01.jpeg-COL19",
        "Image01.jpeg",
        None,
        (),
        (),
        (),
        None,
        [table_headers.IMAGE_FILE],
    )
    assert expected == assay.materials["S1-A1-Image01.jpeg-COL19"]

    expected = models.Process(
        "data collection",
        "S1-A1-Scan02-18",
        "Scan02",
        "Scan Name",
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF, table_headers.SCAN_NAME],
    )
    assert expected == assay.processes["S1-A1-Scan02-18"]

    header_electrophoresis = [
        table_headers.PROTOCOL_REF,
        table_headers.GEL_ELECTROPHORESIS_ASSAY_NAME,
        table_headers.FIRST_DIMENSION,
        table_headers.TERM_SOURCE_REF,
        table_headers.TERM_ACCESSION_NUMBER,
        table_headers.SECOND_DIMENSION,
        table_headers.TERM_SOURCE_REF,
        table_headers.TERM_ACCESSION_NUMBER,
    ]

    expected = models.Process(
        "electrophoresis",
        "S1-A1-Assay01-10",
        "Assay01",
        "Gel Electrophoresis Assay Name",
        None,
        None,
        (),
        (),
        None,
        models.OntologyTermRef("", "", ""),
        models.OntologyTermRef("", "", ""),
        header_electrophoresis,
    )
    assert expected == assay.processes["S1-A1-Assay01-10"]

    expected = models.Process(
        "electrophoresis",
        "S1-A1-electrophoresis-9-2",
        "",
        "Gel Electrophoresis Assay Name",
        None,
        None,
        (),
        (),
        None,
        models.OntologyTermRef("AssayX", None, None),
        models.OntologyTermRef("AssayY", None, None),
        header_electrophoresis,
    )
    assert expected == assay.processes["S1-A1-electrophoresis-9-2"]
Exemplo n.º 19
0
def test_assay_reader_small2_assay(small2_investigation_file,
                                   small2_assay_file):
    """Use ``AssayReader`` to read in small assay file."""
    # Load investigation (tested elsewhere)
    investigation = InvestigationReader.from_stream(
        small2_investigation_file).read()
    InvestigationValidator(investigation).validate()

    # Create new row reader and check read headers
    reader = AssayReader.from_stream("S1", "A1", small2_assay_file)
    assert 14 == len(reader.header)

    # Read assay
    assay = reader.read()
    AssayValidator(investigation, investigation.studies[0],
                   investigation.studies[0].assays[0], assay).validate()

    # Check results
    assert os.path.normpath(str(assay.file)).endswith(
        os.path.normpath("data/i_small2/a_small2.txt"))
    assert 14 == len(assay.header)
    assert 25 == len(assay.materials)
    assert 41 == len(assay.processes)
    assert 74 == len(assay.arcs)

    # Comments
    expected = models.Comment(name="Replicate", value="B")
    assert assay.materials["S1-A1-0815-T1-Pro1-B-115-COL5"].comments[
        0] == expected

    # Expected arcs
    expected = (
        models.Arc("S1-sample-0815-N1", "S1-A1-extraction-2-1"),
        models.Arc("S1-sample-0815-T1", "S1-A1-extraction-2-2"),
        models.Arc("S1-A1-extraction-2-1", "S1-A1-0815-N1-Pro1-COL3"),
        models.Arc("S1-A1-extraction-2-2", "S1-A1-0815-T1-Pro1-COL3"),
        models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-1"),
        models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-2"),
        models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-3"),
        models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-4"),
        models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-5"),
        models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-6"),
        models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-7"),
        models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-8"),
        models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-9"),
        models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-10"),
        models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-11"),
        models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-12"),
        models.Arc("S1-A1-labeling-4-1", "S1-A1-0815-N1-Pro1-A-114-COL5"),
        models.Arc("S1-A1-labeling-4-2", "S1-A1-0815-T1-Pro1-A-115-COL5"),
        models.Arc("S1-A1-labeling-4-3", "S1-A1-0815-N1-Pro1-B-114-COL5"),
        models.Arc("S1-A1-labeling-4-4", "S1-A1-0815-T1-Pro1-B-115-COL5"),
        models.Arc("S1-A1-labeling-4-5", "S1-A1-0815-N1-Pro1-C-114-COL5"),
        models.Arc("S1-A1-labeling-4-6", "S1-A1-0815-T1-Pro1-C-115-COL5"),
        models.Arc("S1-A1-labeling-4-7", "S1-A1-0815-N1-Pro1-D-114-COL5"),
        models.Arc("S1-A1-labeling-4-8", "S1-A1-0815-T1-Pro1-D-115-COL5"),
        models.Arc("S1-A1-labeling-4-9", "S1-A1-0815-N1-Pro1-E-114-COL5"),
        models.Arc("S1-A1-labeling-4-10", "S1-A1-0815-T1-Pro1-E-115-COL5"),
        models.Arc("S1-A1-labeling-4-11", "S1-A1-0815-N1-Pro1-F-114-COL5"),
        models.Arc("S1-A1-labeling-4-12", "S1-A1-0815-T1-Pro1-F-115-COL5"),
        models.Arc("S1-A1-0815-N1-Pro1-A-114-COL5",
                   "S1-A1-chromatography-8-1"),
        models.Arc("S1-A1-0815-T1-Pro1-A-115-COL5",
                   "S1-A1-chromatography-8-2"),
        models.Arc("S1-A1-0815-N1-Pro1-B-114-COL5",
                   "S1-A1-chromatography-8-3"),
        models.Arc("S1-A1-0815-T1-Pro1-B-115-COL5",
                   "S1-A1-chromatography-8-4"),
        models.Arc("S1-A1-0815-N1-Pro1-C-114-COL5",
                   "S1-A1-chromatography-8-5"),
        models.Arc("S1-A1-0815-T1-Pro1-C-115-COL5",
                   "S1-A1-chromatography-8-6"),
        models.Arc("S1-A1-0815-N1-Pro1-D-114-COL5",
                   "S1-A1-chromatography-8-7"),
        models.Arc("S1-A1-0815-T1-Pro1-D-115-COL5",
                   "S1-A1-chromatography-8-8"),
        models.Arc("S1-A1-0815-N1-Pro1-E-114-COL5",
                   "S1-A1-chromatography-8-9"),
        models.Arc("S1-A1-0815-T1-Pro1-E-115-COL5",
                   "S1-A1-chromatography-8-10"),
        models.Arc("S1-A1-0815-N1-Pro1-F-114-COL5",
                   "S1-A1-chromatography-8-11"),
        models.Arc("S1-A1-0815-T1-Pro1-F-115-COL5",
                   "S1-A1-chromatography-8-12"),
        models.Arc("S1-A1-chromatography-8-1", "S1-A1-poolA-10"),
        models.Arc("S1-A1-chromatography-8-2", "S1-A1-poolA-10"),
        models.Arc("S1-A1-chromatography-8-3", "S1-A1-mass spectrometry-9-3"),
        models.Arc("S1-A1-chromatography-8-4", "S1-A1-mass spectrometry-9-4"),
        models.Arc("S1-A1-chromatography-8-5", "S1-A1-poolC-10"),
        models.Arc("S1-A1-chromatography-8-6", "S1-A1-poolC-10"),
        models.Arc("S1-A1-chromatography-8-7", "S1-A1-mass spectrometry-9-7"),
        models.Arc("S1-A1-chromatography-8-8", "S1-A1-mass spectrometry-9-8"),
        models.Arc("S1-A1-chromatography-8-9", "S1-A1-poolE-10"),
        models.Arc("S1-A1-chromatography-8-10", "S1-A1-poolE-10"),
        models.Arc("S1-A1-chromatography-8-11", "S1-A1-poolF-10"),
        models.Arc("S1-A1-chromatography-8-12", "S1-A1-poolF-10"),
        models.Arc("S1-A1-poolA-10", "S1-A1-poolA.raw-COL11"),
        models.Arc("S1-A1-mass spectrometry-9-3", "S1-A1-poolB.raw-COL11"),
        models.Arc("S1-A1-mass spectrometry-9-4", "S1-A1-poolB.raw-COL11"),
        models.Arc("S1-A1-poolC-10",
                   "S1-A1-Empty Raw Spectral Data File-11-5"),
        models.Arc("S1-A1-mass spectrometry-9-7",
                   "S1-A1-Empty Raw Spectral Data File-11-7"),
        models.Arc("S1-A1-mass spectrometry-9-8",
                   "S1-A1-Empty Raw Spectral Data File-11-8"),
        models.Arc("S1-A1-poolE-10", "S1-A1-poolE.raw-COL11"),
        models.Arc("S1-A1-poolF-10",
                   "S1-A1-Empty Raw Spectral Data File-11-11"),
        models.Arc("S1-A1-poolA.raw-COL11", "S1-A1-data transformation-12-1"),
        models.Arc("S1-A1-poolB.raw-COL11", "S1-A1-data transformation-12-3"),
        models.Arc("S1-A1-Empty Raw Spectral Data File-11-5",
                   "S1-A1-data transformation-12-5"),
        models.Arc("S1-A1-Empty Raw Spectral Data File-11-7",
                   "S1-A1-data transformation-12-7"),
        models.Arc("S1-A1-Empty Raw Spectral Data File-11-8",
                   "S1-A1-data transformation-12-8"),
        models.Arc("S1-A1-poolE.raw-COL11", "S1-A1-data transformation-12-9"),
        models.Arc("S1-A1-Empty Raw Spectral Data File-11-11",
                   "S1-A1-data analysis-13"),
        models.Arc("S1-A1-data transformation-12-1",
                   "S1-A1-results.csv-COL14"),
        models.Arc("S1-A1-data transformation-12-3",
                   "S1-A1-results.csv-COL14"),
        models.Arc("S1-A1-data transformation-12-5",
                   "S1-A1-results.csv-COL14"),
        models.Arc("S1-A1-data transformation-12-7",
                   "S1-A1-results.csv-COL14"),
        models.Arc("S1-A1-data transformation-12-8",
                   "S1-A1-results.csv-COL14"),
        models.Arc("S1-A1-data transformation-12-9",
                   "S1-A1-Empty Derived Data File-14-9"),
        models.Arc("S1-A1-data analysis-13", "S1-A1-results.csv-COL14"),
    )
    assert sorted(expected) == sorted(assay.arcs)
Exemplo n.º 20
0
def test_assay_reader_small_assay(small_investigation_file, small_assay_file):
    """Use ``AssayReader`` to read in small assay file."""
    # Load investigation (tested elsewhere)
    investigation = InvestigationReader.from_stream(
        small_investigation_file).read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()

    # Check warnings
    assert 1 == len(record)

    # Create new row reader and check read headers
    reader = AssayReader.from_stream("S1", "A1", small_assay_file)
    assert 9 == len(reader.header)

    # Read assay
    with pytest.warns(IsaWarning) as record:
        assay = reader.read()
        AssayValidator(investigation, investigation.studies[0],
                       investigation.studies[0].assays[0], assay).validate()

    # Check warnings
    assert 1 == len(record)

    # Check results
    assert os.path.normpath(str(assay.file)).endswith(
        os.path.normpath("data/i_small/a_small.txt"))
    assert 9 == len(assay.header)
    assert 9 == len(assay.materials)
    assert 5 == len(assay.processes)
    assert 13 == len(assay.arcs)

    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-N1",
        "0815-N1",
        None,
        (),
        (),
        (),
        None,
        [table_headers.SAMPLE_NAME],
    )
    assert expected == assay.materials["S1-sample-0815-N1"]
    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-T1",
        "0815-T1",
        None,
        (),
        (),
        (),
        None,
        [table_headers.SAMPLE_NAME],
    )
    assert expected == assay.materials["S1-sample-0815-T1"]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6",
        "0815-N1-DNA1-WES1_L???_???_R1.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == assay.materials[
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
        "0815-N1-DNA1-WES1_L???_???_R2.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == assay.materials[
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7"]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6",
        "0815-T1-DNA1-WES1_L???_???_R1.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == assay.materials[
        "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
        "0815-T1-DNA1-WES1_L???_???_R2.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == assay.materials[
        "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7"]
    expected = models.Material(
        "Derived Data File",
        "S1-A1-0815-somatic.vcf.gz-COL9",
        "0815-somatic.vcf.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.DERIVED_DATA_FILE],
    )
    assert expected == assay.materials["S1-A1-0815-somatic.vcf.gz-COL9"]

    expected = models.Process(
        "library preparation",
        "S1-A1-library preparation-2-1",
        None,
        None,
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF],
    )
    assert expected == assay.processes["S1-A1-library preparation-2-1"]
    expected = models.Process(
        "library preparation",
        "S1-A1-library preparation-2-2",
        None,
        None,
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF],
    )
    assert expected == assay.processes["S1-A1-library preparation-2-2"]
    expected = models.Process(
        "nucleic acid sequencing",
        "S1-A1-0815-N1-DNA1-WES1-5",
        "0815-N1-DNA1-WES1",
        "Assay Name",
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME],
    )
    assert expected == assay.processes["S1-A1-0815-N1-DNA1-WES1-5"]
    expected = models.Process(
        "nucleic acid sequencing",
        "S1-A1-0815-T1-DNA1-WES1-5",
        "0815-T1-DNA1-WES1",
        "Assay Name",
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME],
    )
    assert expected == assay.processes["S1-A1-0815-T1-DNA1-WES1-5"]

    expected = (
        models.Arc("S1-sample-0815-N1", "S1-A1-library preparation-2-1"),
        models.Arc("S1-A1-library preparation-2-1", "S1-A1-0815-N1-DNA1-COL3"),
        models.Arc("S1-A1-0815-N1-DNA1-COL3", "S1-A1-0815-N1-DNA1-WES1-5"),
        models.Arc("S1-A1-0815-N1-DNA1-WES1-5",
                   "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"),
        models.Arc(
            "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6",
            "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
        ),
        models.Arc("S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
                   "S1-A1-somatic variant calling-1-8"),
        models.Arc("S1-A1-somatic variant calling-1-8",
                   "S1-A1-0815-somatic.vcf.gz-COL9"),
        models.Arc("S1-sample-0815-T1", "S1-A1-library preparation-2-2"),
        models.Arc("S1-A1-library preparation-2-2", "S1-A1-0815-T1-DNA1-COL3"),
        models.Arc("S1-A1-0815-T1-DNA1-COL3", "S1-A1-0815-T1-DNA1-WES1-5"),
        models.Arc("S1-A1-0815-T1-DNA1-WES1-5",
                   "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"),
        models.Arc(
            "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6",
            "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
        ),
        models.Arc("S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
                   "S1-A1-somatic variant calling-1-8"),
    )
    assert expected == assay.arcs
Exemplo n.º 21
0
def test_parse_full_investigation(full_investigation_file):
    # Read Investigation from file-like object
    reader = InvestigationReader.from_stream(full_investigation_file)
    investigation = reader.read()
    InvestigationValidator(investigation).validate()

    # Check results
    # Investigation
    assert investigation

    # Ontology sources
    assert 9 == len(investigation.ontology_source_refs)
    expected_headers = [
        *investigation_headers.ONTOLOGY_SOURCE_REF_KEYS, "Comment[Test]"
    ]
    expected = models.OntologyRef(
        "OBI",
        "http://data.bioontology.org/ontologies/OBI",
        "21",
        "Ontology for Biomedical Investigations",
        (models.Comment("Test", "4"), ),
        expected_headers,
    )
    assert expected == investigation.ontology_source_refs["OBI"]
    expected = models.OntologyRef(
        "NCBITAXON",
        "http://data.bioontology.org/ontologies/NCBITAXON",
        "2",
        ("National Center for Biotechnology Information (NCBI) Organismal "
         "Classification"),
        (models.Comment("Test", "1"), ),
        expected_headers,
    )
    assert expected == investigation.ontology_source_refs["NCBITAXON"]

    # Basic info
    assert ("Growth control of the eukaryote cell: a systems biology study "
            "in yeast") == investigation.info.title
    assert "BII-I-1" == investigation.info.identifier
    assert date(2007, 4, 30) == investigation.info.submission_date
    assert date(2009, 3, 10) == investigation.info.public_release_date

    expected_headers = [
        *investigation_headers.INVESTIGATION_INFO_KEYS,
        "Comment[Created With Configuration]",
        "Comment[Last Opened With Configuration]",
        "Comment[Owning Organisation URI]",
        "Comment[Consortium URI]",
        "Comment[Principal Investigator URI]",
        "Comment[Investigation Keywords]",
    ]
    assert expected_headers == investigation.info.headers

    # Publications
    assert 3 == len(investigation.publications)
    expected_headers = [
        *investigation_headers.INVESTIGATION_PUBLICATIONS_KEYS[0:4],
        "Comment[Subtitle]",
        *investigation_headers.INVESTIGATION_PUBLICATIONS_KEYS[4:],
    ]
    expected = models.PublicationInfo(
        "17439666",
        "doi:10.1186/jbiol54",
        "Castrillo JI, Zeef LA, Hoyle DC, Zhang N, Hayes A, Gardner DC, "
        "Cornell MJ, Petty J, Hakes L, Wardleworth L, Rash B, Brown M, "
        "Dunn WB, Broadhurst D, O'Donoghue K, Hester SS, Dunkley TP, Hart "
        "SR, Swainston N, Li P, Gaskell SJ, Paton NW, Lilley KS, Kell DB, "
        "Oliver SG.",
        "Growth control of the eukaryote cell: a systems biology study in "
        "yeast.",
        models.OntologyTermRef("indexed in Pubmed", "", ""),
        (models.Comment("Subtitle", ""), ),
        expected_headers,
    )
    assert expected == investigation.publications[0]
    expected = models.PublicationInfo(
        "1231222",
        "",
        "Piatnochka IT.",
        "Effect of prednisolone on the cardiovascular system in complex "
        "treatment of newly detected pulmonary tuberculosis",
        models.OntologyTermRef("published",
                               "http://www.ebi.ac.uk/efo/EFO_0001796", "EFO"),
        (models.Comment("Subtitle", "Something"), ),
        expected_headers,
    )
    assert expected == investigation.publications[1]

    # Contacts
    assert 3 == len(investigation.contacts)
    expected_headers = [
        *investigation_headers.INVESTIGATION_CONTACTS_KEYS,
        "Comment[Investigation Person ORCID]",
        "Comment[Investigation Person REF]",
    ]
    expected = models.ContactInfo(
        "Oliver",
        "Stephen",
        "G",
        "*****@*****.**",
        "",
        "",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("corresponding author", "", ""),
        (
            models.Comment("Investigation Person ORCID", "12345"),
            models.Comment("Investigation Person REF", "personA"),
        ),
        expected_headers,
    )
    assert expected == investigation.contacts[0]
    expected = models.ContactInfo(
        "Juan",
        "Castrillo",
        "I",
        "",
        "123456789",
        "",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("author", "", ""),
        (
            models.Comment("Investigation Person ORCID", "0987654321"),
            models.Comment("Investigation Person REF", "personB"),
        ),
        expected_headers,
    )
    assert expected == investigation.contacts[1]
    expected = models.ContactInfo(
        "Leo",
        "Zeef",
        "A",
        "",
        "",
        "+49 123456789",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("author",
                               "http://purl.obolibrary.org/obo/RoleO_0000061",
                               "ROLEO"),
        (
            models.Comment("Investigation Person ORCID", "1357908642"),
            models.Comment("Investigation Person REF", "personC"),
        ),
        expected_headers,
    )
    assert expected == investigation.contacts[2]

    # Studies
    assert len(investigation.studies) == 2

    # Study 1
    study = investigation.studies[0]
    assert "BII-S-1" == study.info.identifier
    assert ("Study of the impact of changes in flux on the transcriptome, "
            "proteome, endometabolome and exometabolome of the yeast "
            "Saccharomyces cerevisiae under different nutrient limitations"
            ) == study.info.title
    assert Path("s_BII-S-1.txt") == study.info.path

    # Study 1 - Design descriptors
    assert 2 == len(study.designs)
    expected_headers = [
        *investigation_headers.STUDY_DESIGN_DESCR_KEYS[0:1],
        "Comment[Test1]",
        *investigation_headers.STUDY_DESIGN_DESCR_KEYS[1:],
        "Comment[Test2]",
    ]
    expected = (
        models.DesignDescriptorsInfo(
            models.OntologyTermRef(
                "intervention design",
                "http://purl.obolibrary.org/obo/OBI_0000115", "OBI"),
            (models.Comment("Test1", "1"), models.Comment("Test2", "3")),
            expected_headers,
        ),
        models.DesignDescriptorsInfo(
            models.OntologyTermRef(
                "genotyping design",
                "http://purl.obolibrary.org/obo/OBI_0001444", "OBI"),
            (models.Comment("Test1", "2"), models.Comment("Test2", "4")),
            expected_headers,
        ),
    )
    assert expected == study.designs

    # Study 1 - Publications
    assert 1 == len(study.publications)
    expected = models.PublicationInfo(
        "17439666",
        "doi:10.1186/jbiol54",
        "Castrillo JI, Zeef LA, Hoyle DC, Zhang N, Hayes A, Gardner DC, "
        "Cornell MJ, Petty J, Hakes L, Wardleworth L, Rash B, Brown M, "
        "Dunn WB, Broadhurst D, O'Donoghue K, Hester SS, Dunkley TP, Hart "
        "SR, Swainston N, Li P, Gaskell SJ, Paton NW, Lilley KS, Kell DB, "
        "Oliver SG.",
        "Growth control of the eukaryote cell: a systems biology study in "
        "yeast.",
        models.OntologyTermRef("published", "", ""),
        (models.Comment("Subtitle", "Something"), ),
        [
            *investigation_headers.STUDY_PUBLICATIONS_KEYS[0:4],
            "Comment[Subtitle]",
            *investigation_headers.STUDY_PUBLICATIONS_KEYS[4:],
        ],
    )
    assert expected == study.publications[0]

    # Study 1 - Factors
    assert 2 == len(study.factors)
    expected_headers = [
        *investigation_headers.STUDY_FACTORS_KEYS, "Comment[FactorsTest]"
    ]
    expected = models.FactorInfo(
        "limiting nutrient",
        models.OntologyTermRef("chemical entity",
                               "http://purl.obolibrary.org/obo/CHEBI_24431",
                               "CHEBI"),
        (models.Comment("FactorsTest", "1"), ),
        expected_headers,
    )
    assert expected == study.factors["limiting nutrient"]
    expected = models.FactorInfo(
        "rate",
        models.OntologyTermRef("rate",
                               "http://purl.obolibrary.org/obo/PATO_0000161",
                               "PATO"),
        (models.Comment("FactorsTest", "2"), ),
        expected_headers,
    )
    assert expected == study.factors["rate"]

    # Study 1 - Assays
    assert 3 == len(study.assays)
    expected_headers = [
        *investigation_headers.STUDY_ASSAYS_KEYS, "Comment[Extra Info]"
    ]
    expected = models.AssayInfo(
        models.OntologyTermRef("protein expression profiling",
                               "http://purl.obolibrary.org/obo/OBI_0000615",
                               "OBI"),
        models.OntologyTermRef("mass spectrometry",
                               "http://purl.obolibrary.org/obo/OBI_0000470",
                               "OBI"),
        "iTRAQ",
        Path("a_proteome.txt"),
        (models.Comment("Extra Info", "a"), ),
        expected_headers,
    )
    assert expected == study.assays[0]
    expected = models.AssayInfo(
        models.OntologyTermRef("transcription profiling",
                               "http://purl.obolibrary.org/obo/OBI_0000424",
                               "OBI"),
        models.OntologyTermRef("DNA microarray",
                               "http://purl.obolibrary.org/obo/OBI_0400148",
                               "OBI"),
        "Affymetrix",
        Path("a_transcriptome.txt"),
        (models.Comment("Extra Info", "c"), ),
        expected_headers,
    )
    assert expected == study.assays[2]

    # Study 1 - Protocols
    assert 7 == len(study.protocols)
    expected_headers = [
        *investigation_headers.STUDY_PROTOCOLS_KEYS[0:7],
        "Comment[Protocol Rating]",
        *investigation_headers.STUDY_PROTOCOLS_KEYS[7:],
    ]
    expected = models.ProtocolInfo(
        "growth protocol",
        models.OntologyTermRef("growth", "", ""),
        "1. Biomass samples (45 ml) were taken via the sample port of the "
        "Applikon fermenters. The cells were pelleted by centrifugation for 5 "
        "min at 5000 rpm. The supernatant was removed and the RNA pellet "
        "resuspended in the residual medium to form a slurry. This was added "
        "in a dropwise manner directly into a 5 ml Teflon flask (B. Braun "
        "Biotech, Germany) containing liquid nitrogen and a 7 mm-diameter "
        "tungsten carbide ball. After allowing evaporation of the liquid "
        "nitrogen the flask was reassembled and the cells disrupted by "
        "agitation at 1500 rpm for 2 min in a Microdismembranator U (B. Braun "
        "Biotech, Germany) 2. The frozen powder was then dissolved in 1 ml of "
        "TriZol reagent (Sigma-Aldrich, UK), vortexed for 1 min, and then kept"
        " at room temperature for a further 5min. 3. Chloroform extraction was"
        " performed by addition of 0.2 ml chloroform, shaking vigorously or 15"
        " s, then 5min incubation at room temperature. 4. Following "
        "centrifugation at 12,000 rpm for 5 min, the RNA (contained in the "
        "aqueous phase) was precipitated with 0.5 vol of 2-propanol at room "
        "temperature for 15 min. 5. After further centrifugation (12,000 rpm "
        "for 10 min at 4 C) the RNA pellet was washed twice with 70 % (v/v) "
        "ethanol, briefly air-dried, and redissolved in 0.5 ml diethyl "
        "pyrocarbonate (DEPC)-treated water. 6. The single-stranded RNA was "
        "precipitated once more by addition of 0.5 ml of LiCl buffer (4 M "
        "LiCl, 20 mM Tris-HCl, pH 7.5, 10 mM EDTA), thus removing tRNA and "
        "DNA from the sample. 7. After precipitation (20 C for 1h) and "
        "centrifugation (12,000 rpm, 30 min, 4 C), the RNA was washed twice in"
        " 70 % (v/v) ethanol prior to being dissolved in a minimal volume of "
        "DEPC-treated water. 8. Total RNA quality was checked using the RNA "
        "6000 Nano Assay, and analysed on an Agilent 2100 Bioanalyser (Agilent"
        " Technologies). RNA was quantified using the Nanodrop ultra low "
        "volume spectrophotometer (Nanodrop Technologies).",
        "",
        "",
        {
            "rate":
            models.OntologyTermRef(
                "rate", "http://purl.obolibrary.org/obo/PATO_0000161", "PATO")
        },
        {},
        (models.Comment("Protocol Rating", "1"), ),
        expected_headers,
    )
    assert expected == study.protocols["growth protocol"]
    expected = models.ProtocolInfo(
        "metabolite extraction",
        models.OntologyTermRef("extraction",
                               "http://purl.obolibrary.org/obo/OBI_0302884",
                               "OBI"),
        "",
        "",
        "",
        {
            "standard volume": models.OntologyTermRef("standard volume", "",
                                                      ""),
            "sample volume": models.OntologyTermRef("sample volume", "", ""),
        },
        {
            "pipette":
            models.ProtocolComponentInfo(
                "pipette",
                models.OntologyTermRef("instrument",
                                       "http://www.ebi.ac.uk/efo/EFO_0000548",
                                       "EFO"),
            )
        },
        (models.Comment("Protocol Rating", "7"), ),
        expected_headers,
    )
    assert expected == study.protocols["metabolite extraction"]

    # Study 1 - Contacts
    assert 3 == len(study.contacts)
    expected_headers = [
        *investigation_headers.STUDY_CONTACTS_KEYS, "Comment[Study Person REF]"
    ]
    expected = models.ContactInfo(
        "Oliver",
        "Stephen",
        "G",
        "*****@*****.**",
        "",
        "",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("corresponding author", "", ""),
        (models.Comment("Study Person REF", ""), ),
        expected_headers,
    )
    assert expected == study.contacts[0]
    expected = models.ContactInfo(
        "Juan",
        "Castrillo",
        "I",
        "",
        "123456789",
        "",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("author",
                               "http://purl.obolibrary.org/obo/RoleO_0000061",
                               "ROLEO"),
        (models.Comment("Study Person REF", ""), ),
        expected_headers,
    )
    assert expected == study.contacts[1]
    expected = models.ContactInfo(
        "Leo",
        "Zeef",
        "A",
        "",
        "",
        "+49 123456789",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("author",
                               "http://purl.obolibrary.org/obo/RoleO_0000061",
                               "ROLEO"),
        (models.Comment("Study Person REF", ""), ),
        expected_headers,
    )
    assert expected == study.contacts[2]

    # Study 2
    study = investigation.studies[1]
    expected = models.BasicInfo(
        Path("s_BII-S-2.txt"),
        "BII-S-2",
        "A time course analysis of transcription response in yeast treated "
        "with rapamycin, a specific inhibitor of the TORC1 complex: impact "
        "on yeast growth",
        "Comprehensive high-throughput analyses at the levels of mRNAs, "
        "proteins, and metabolites, and studies on gene expression patterns "
        "are required for systems biology studies of cell growth [4,26-29]. "
        "Although such comprehensive data sets are lacking, many studies have "
        "pointed to a central role for the target-of-rapamycin (TOR) signal "
        "transduction pathway in growth control. TOR is a serine/threonine "
        "kinase that has been conserved from yeasts to mammals; it integrates "
        "signals from nutrients or growth factors to regulate cell growth and "
        "cell-cycle progression coordinately. Although such comprehensive data "
        "sets are lacking, many studies have pointed to a central role for the "
        "target-of-rapamycin (TOR) signal transduction pathway in growth "
        "control. TOR is a serine/threonine kinase that has been conserved "
        "from yeasts to mammals; it integrates signals from nutrients or "
        "growth factors to regulate cell growth and cell-cycle progression "
        "coordinately. The effect of rapamycin were studied as follows: a "
        "culture growing at mid-exponential phase was divided into two. "
        "Rapamycin (200 ng/ml) was added to one half, and the drug's solvent "
        "to the other, as the control. Samples were taken at 0, 1, 2 and 4 h "
        "after treatment. Gene expression at the mRNA level was investigated "
        "by transcriptome analysis using Affymetrix hybridization arrays.",
        date(2007, 4, 30),
        date(2009, 3, 10),
        (
            models.Comment("Study Grant Number", ""),
            models.Comment("Study Funding Agency", ""),
            models.Comment("Manuscript Licence", "CC BY 3.0"),
            models.Comment("Experimental Metadata Licence", "CC0"),
            models.Comment("Data Repository", ""),
            models.Comment("Data Record Accession", ""),
            models.Comment("Data Record URI", ""),
            models.Comment("Supplementary Information File Name", ""),
            models.Comment("Supplementary Information File Type", ""),
            models.Comment("Supplementary File URI", ""),
            models.Comment("Subject Keywords", ""),
        ),
        [
            *investigation_headers.STUDY_INFO_KEYS[0:3],
            "Comment[Study Grant Number]",
            "Comment[Study Funding Agency]",
            *investigation_headers.STUDY_INFO_KEYS[3:],
            "Comment[Manuscript Licence]",
            "Comment[Experimental Metadata Licence]",
            "Comment[Data Repository]",
            "Comment[Data Record Accession]",
            "Comment[Data Record URI]",
            "Comment[Supplementary Information File Name]",
            "Comment[Supplementary Information File Type]",
            "Comment[Supplementary File URI]",
            "Comment[Subject Keywords]",
        ],
    )
    assert expected == study.info

    # Study 2 - Factors
    assert 3 == len(study.factors)
    expected = models.FactorInfo(
        "exposure time",
        models.OntologyTermRef("time",
                               "http://purl.obolibrary.org/obo/PATO_0000165",
                               "OBI_BCGO"),
        (),
        [*investigation_headers.STUDY_FACTORS_KEYS],
    )
    assert expected == study.factors["exposure time"]

    # Study 2 - Assays
    assert 1 == len(study.assays)
    expected = models.AssayInfo(
        models.OntologyTermRef("transcription profiling",
                               "http://purl.obolibrary.org/obo/OBI_0000424",
                               "OBI"),
        models.OntologyTermRef("DNA microarray",
                               "http://purl.obolibrary.org/obo/OBI_0400148",
                               "OBI"),
        "Affymetrix",
        Path("a_microarray.txt"),
        (),
        [*investigation_headers.STUDY_ASSAYS_KEYS],
    )
    assert expected == study.assays[0]

    # Study 2 - Protocols
    assert 10 == len(study.protocols)
    expected = models.ProtocolInfo(
        "NMR spectroscopy",
        models.OntologyTermRef("NMR spectroscopy",
                               "http://purl.obolibrary.org/obo/OBI_0000623",
                               "OBI"),
        "",
        "",
        "",
        {},
        {
            "NMR tubes":
            models.ProtocolComponentInfo(
                "NMR tubes", models.OntologyTermRef(None, None, None)),
            "Bruker-Av600":
            models.ProtocolComponentInfo(
                "Bruker-Av600",
                models.OntologyTermRef("instrument",
                                       "http://www.ebi.ac.uk/efo/EFO_0000548",
                                       "EFO"),
            ),
        },
        (),
        [*investigation_headers.STUDY_PROTOCOLS_KEYS],
    )
    assert expected == study.protocols["NMR spectroscopy"]

    # Study 2 - Contacts
    assert 3 == len(study.contacts)
    expected = models.ContactInfo(
        "Juan",
        "Castrillo",
        "I",
        "",
        "123456789",
        "",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("author",
                               "http://purl.obolibrary.org/obo/RoleO_0000061",
                               "ROLEO"),
        (models.Comment("Study Person REF", "personB"), ),
        [
            *investigation_headers.STUDY_CONTACTS_KEYS,
            "Comment[Study Person REF]"
        ],
    )
    assert expected == study.contacts[1]
Exemplo n.º 22
0
def test_study_reader_minimal_study(minimal_investigation_file,
                                    minimal_study_file):
    """Use ``StudyReader`` to read in minimal study file.

    Using the ``StudyReader`` instead of the ``StudyRowReader`` gives us
    ``Study`` objects instead of just the row-wise nodes.
    """
    # Load investigation (tested elsewhere)
    investigation = InvestigationReader.from_stream(
        minimal_investigation_file).read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()

    # Check warnings
    assert 2 == len(record)

    # Create new row reader and check read headers
    reader = StudyReader.from_stream("S1", minimal_study_file)
    assert 3 == len(reader.header)

    # Read study
    study = reader.read()
    StudyValidator(investigation, investigation.studies[0], study).validate()

    # Check results
    assert os.path.normpath(str(study.file)).endswith(
        os.path.normpath("data/i_minimal/s_minimal.txt"))
    assert 3 == len(study.header)
    assert 2 == len(study.materials)
    assert 1 == len(study.processes)
    assert 2 == len(study.arcs)

    expected = models.Material("Source Name", "S1-source-0815", "0815", None,
                               (), (), (), None, [table_headers.SOURCE_NAME])
    assert expected == study.materials["S1-source-0815"]
    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-N1",
        "0815-N1",
        None,
        (),
        (),
        (),
        None,
        [table_headers.SAMPLE_NAME],
    )
    assert expected == study.materials["S1-sample-0815-N1"]

    expected = models.Process(
        "sample collection",
        "S1-sample collection-2-1",
        None,
        None,
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF],
    )
    assert expected == study.processes["S1-sample collection-2-1"]

    expected = (
        models.Arc("S1-source-0815", "S1-sample collection-2-1"),
        models.Arc("S1-sample collection-2-1", "S1-sample-0815-N1"),
    )
    assert expected == study.arcs
Exemplo n.º 23
0
def test_study_reader_small_study(small_investigation_file, small_study_file):
    """Use ``StudyReader`` to read in small study file."""
    # Load investigation (tested elsewhere)
    with pytest.warns(IsaWarning) as record:
        investigation = InvestigationReader.from_stream(
            small_investigation_file).read()
        InvestigationValidator(investigation).validate()

    # Check warnings
    assert 2 == len(record)

    # Create new row reader and check read headers
    reader = StudyReader.from_stream("S1", small_study_file)
    assert 13 == len(reader.header)

    # Read study
    study = reader.read()
    StudyValidator(investigation, investigation.studies[0], study).validate()

    # Check results
    assert os.path.normpath(str(study.file)).endswith(
        os.path.normpath("data/i_small/s_small.txt"))
    assert 13 == len(study.header)
    assert 9 == len(study.materials)
    assert 5 == len(study.processes)
    assert 10 == len(study.arcs)

    headers_source = [
        table_headers.SOURCE_NAME,
        table_headers.CHARACTERISTICS + "[organism]",
        table_headers.TERM_SOURCE_REF,
        table_headers.TERM_ACCESSION_NUMBER,
        table_headers.CHARACTERISTICS + "[age]",
        table_headers.UNIT,
        table_headers.TERM_SOURCE_REF,
        table_headers.TERM_ACCESSION_NUMBER,
    ]
    headers_collection = [
        table_headers.PROTOCOL_REF,
        table_headers.PARAMETER_VALUE + "[instrument]",
        table_headers.PERFORMER,
        table_headers.DATE,
    ]
    headers_sample = [
        table_headers.SAMPLE_NAME,
        table_headers.CHARACTERISTICS + "[status]",
        table_headers.FACTOR_VALUE + "[treatment]",
    ]

    unit = models.OntologyTermRef(
        name="day",
        accession="http://purl.obolibrary.org/obo/UO_0000033",
        ontology_name="UO")

    characteristics1 = (
        models.Characteristics(
            name="organism",
            value=[
                models.OntologyTermRef(
                    name="Mus musculus",
                    accession="http://purl.bioontology.org/ontology/"
                    "NCBITAXON/10090",
                    ontology_name="NCBITAXON",
                )
            ],
            unit=None,
        ),
        models.Characteristics(name="age", value=["90"], unit=unit),
    )
    characteristics2 = (
        models.Characteristics(
            name="organism",
            value=[models.OntologyTermRef("Mus musculus", "", "")],
            unit=None),
        models.Characteristics(name="age", value=[""], unit=unit),
    )
    characteristics3 = (
        models.Characteristics(
            name="organism",
            value=[models.OntologyTermRef(None, None, None)],
            unit=None),
        models.Characteristics(name="age", value=["150"], unit=unit),
    )

    expected = models.Material(
        "Source Name",
        "S1-source-0815",
        "0815",
        None,
        characteristics1,
        (),
        (),
        None,
        headers_source,
    )
    assert expected == study.materials["S1-source-0815"]
    expected = models.Material(
        "Source Name",
        "S1-source-0816",
        "0816",
        None,
        characteristics2,
        (),
        (),
        None,
        headers_source,
    )
    assert expected == study.materials["S1-source-0816"]
    expected = models.Material(
        "Source Name",
        "S1-source-0817",
        "0817",
        None,
        characteristics3,
        (),
        (),
        None,
        headers_source,
    )
    assert expected == study.materials["S1-source-0817"]
    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-N1",
        "0815-N1",
        None,
        (models.Characteristics("status", ["0"], None), ),
        (),
        (models.FactorValue("treatment", "yes", None), ),
        None,
        headers_sample,
    )
    assert expected == study.materials["S1-sample-0815-N1"]
    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-T1",
        "0815-T1",
        None,
        (models.Characteristics("status", ["2"], None), ),
        (),
        (models.FactorValue("treatment", "", None), ),
        None,
        headers_sample,
    )
    assert expected == study.materials["S1-sample-0815-T1"]
    expected = models.Material(
        "Sample Name",
        "S1-sample-0816-T1",
        "0816-T1",
        None,
        (models.Characteristics("status", ["1"], None), ),
        (),
        (models.FactorValue("treatment", "yes", None), ),
        None,
        headers_sample,
    )
    assert expected == study.materials["S1-sample-0816-T1"]
    expected = models.Material(
        "Sample Name",
        "S1-Empty Sample Name-13-5",
        "",
        None,
        (models.Characteristics("status", [""], None), ),
        (),
        (models.FactorValue("treatment", "", None), ),
        None,
        headers_sample,
    )
    assert expected == study.materials["S1-Empty Sample Name-13-5"]

    expected = models.Process(
        "sample collection",
        "S1-sample collection-9-2",
        None,
        None,
        date(2018, 2, 2),
        "John Doe",
        (models.ParameterValue("instrument", ["scalpel"], None), ),
        (),
        None,
        None,
        None,
        headers_collection,
    )
    assert expected == study.processes["S1-sample collection-9-2"]
    expected = models.Process(
        "sample collection",
        "S1-sample collection-9-3",
        None,
        None,
        date(2018, 2, 2),
        "John Doe",
        (models.ParameterValue("instrument",
                               ["scalpel type A", "scalpel type B"], None), ),
        (),
        None,
        None,
        None,
        headers_collection,
    )
    assert expected == study.processes["S1-sample collection-9-3"]
    expected = models.Process(
        "sample collection",
        "S1-sample collection-9-4",
        None,
        None,
        date(2018, 2, 2),
        "John Doe",
        (models.ParameterValue("instrument", ["scalpel"], None), ),
        (),
        None,
        None,
        None,
        headers_collection,
    )
    assert expected == study.processes["S1-sample collection-9-4"]

    expected = (
        models.Arc("S1-source-0814", "S1-sample collection-9-1"),
        models.Arc("S1-sample collection-9-1", "S1-sample-0814-N1"),
        models.Arc("S1-source-0815", "S1-sample collection-9-2"),
        models.Arc("S1-sample collection-9-2", "S1-sample-0815-N1"),
        models.Arc("S1-source-0815", "S1-sample collection-9-3"),
        models.Arc("S1-sample collection-9-3", "S1-sample-0815-T1"),
        models.Arc("S1-source-0816", "S1-sample collection-9-4"),
        models.Arc("S1-sample collection-9-4", "S1-sample-0816-T1"),
        models.Arc("S1-source-0817", "S1-sample collection-9-5"),
        models.Arc("S1-sample collection-9-5", "S1-Empty Sample Name-13-5"),
    )
    assert expected == study.arcs
Exemplo n.º 24
0
def test_parse_small_investigation(small_investigation_file):
    # Read Investigation from file-like object
    reader = InvestigationReader.from_stream(small_investigation_file)
    investigation = reader.read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()

    # Check warnings
    assert 2 == len(record)

    # Check results
    # Investigation
    assert investigation

    # Ontology sources
    assert 4 == len(investigation.ontology_source_refs)
    expected = models.OntologyRef(
        "OBI",
        "http://data.bioontology.org/ontologies/OBI",
        "31",
        "Ontology for Biomedical Investigations",
        (),
        [*investigation_headers.ONTOLOGY_SOURCE_REF_KEYS],
    )
    assert expected == investigation.ontology_source_refs["OBI"]
    expected = models.OntologyRef(
        "NCBITAXON",
        "http://data.bioontology.org/ontologies/NCBITAXON",
        "8",
        ("National Center for Biotechnology Information (NCBI) Organismal "
         "Classification"),
        (),
        [*investigation_headers.ONTOLOGY_SOURCE_REF_KEYS],
    )
    assert expected == investigation.ontology_source_refs["NCBITAXON"]
    expected = models.OntologyRef(
        "ROLEO",
        "http://data.bioontology.org/ontologies/ROLEO",
        "1",
        "Role Ontology",
        (),
        [*investigation_headers.ONTOLOGY_SOURCE_REF_KEYS],
    )
    assert expected == investigation.ontology_source_refs["ROLEO"]

    # Basic info
    assert "Small Investigation" == investigation.info.title
    assert "i_small" == investigation.info.identifier

    # Studies
    assert len(investigation.studies) == 1
    assert "s_small" == investigation.studies[0].info.identifier
    assert "Small Germline Study" == investigation.studies[0].info.title
    assert Path("s_small.txt") == investigation.studies[0].info.path

    # Assays
    assert len(investigation.studies[0].assays) == 1
    assay = investigation.studies[0].assays[0]
    assert Path("a_small.txt") == assay.path

    # Study contacts
    assert 1 == len(investigation.studies[0].contacts)
Exemplo n.º 25
0
def test_parse_comment_investigation(comment_investigation_file):
    # Read Investigation from file-like object
    reader = InvestigationReader.from_stream(comment_investigation_file)
    investigation = reader.read()
    InvestigationValidator(investigation).validate()

    # Check results
    # Investigation
    assert investigation

    # Ontology sources
    assert 9 == len(investigation.ontology_source_refs)
    expected = models.OntologyRef(
        "OBI",
        "http://data.bioontology.org/ontologies/OBI",
        "21",
        "Ontology for Biomedical Investigations",
        (models.Comment("OntologyComment", "TestValue01"), ),
        [
            *investigation_headers.ONTOLOGY_SOURCE_REF_KEYS[0:2],
            "Comment[OntologyComment]",
            *investigation_headers.ONTOLOGY_SOURCE_REF_KEYS[2:],
        ],
    )
    assert expected == investigation.ontology_source_refs["OBI"]

    # Basic info
    assert "BII-I-1" == investigation.info.identifier
    assert "Owning Organisation URI" == investigation.info.comments[2].name
    assert "TestValue01" == investigation.info.comments[2].value

    expected_headers = [
        *investigation_headers.INVESTIGATION_INFO_KEYS,
        "Comment[Created With Configuration]",
        "Comment[Last Opened With Configuration]",
        "Comment[Owning Organisation URI]",
        "Comment[Consortium URI]",
        "Comment[Principal Investigator URI]",
        "Comment[Investigation Keywords]",
    ]

    # Publications
    assert 3 == len(investigation.publications)
    expected = models.PublicationInfo(
        "17439666",
        "doi:10.1186/jbiol54",
        "Castrillo JI, Zeef LA, Hoyle DC, Zhang N, Hayes A, Gardner DC, "
        "Cornell MJ, Petty J, Hakes L, Wardleworth L, Rash B, Brown M, "
        "Dunn WB, Broadhurst D, O'Donoghue K, Hester SS, Dunkley TP, Hart "
        "SR, Swainston N, Li P, Gaskell SJ, Paton NW, Lilley KS, Kell DB, "
        "Oliver SG.",
        "Growth control of the eukaryote cell: a systems biology study in "
        "yeast.",
        models.OntologyTermRef("indexed in Pubmed", "", ""),
        (models.Comment("InvestPubsComment", "TestValue01"), ),
        [
            *investigation_headers.INVESTIGATION_PUBLICATIONS_KEYS[0:2],
            "Comment[InvestPubsComment]",
            *investigation_headers.INVESTIGATION_PUBLICATIONS_KEYS[2:],
        ],
    )
    assert expected == investigation.publications[0]

    # Contacts
    assert 3 == len(investigation.contacts)
    expected = models.ContactInfo(
        "Leo",
        "Zeef",
        "A",
        "",
        "",
        "+49 123456789",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("author",
                               "http://purl.obolibrary.org/obo/RoleO_0000061",
                               "ROLEO"),
        (
            models.Comment("Investigation Person ORCID", "1357908642"),
            models.Comment("Investigation Person REF", "personC"),
        ),
        [
            *investigation_headers.INVESTIGATION_CONTACTS_KEYS,
            "Comment[Investigation Person ORCID]",
            "Comment[Investigation Person REF]",
        ],
    )
    assert expected == investigation.contacts[2]

    # Studies
    assert len(investigation.studies) == 2

    # Study 1
    study = investigation.studies[0]
    assert "BII-S-1" == study.info.identifier
    assert Path("s_BII-S-1.txt") == study.info.path
    assert "Manuscript Licence" == study.info.comments[2].name
    assert "CC BY 3.0" == study.info.comments[2].value

    expected_headers = [
        *investigation_headers.STUDY_INFO_KEYS[0:3],
        "Comment[Study Grant Number]",
        "Comment[Study Funding Agency]",
        *investigation_headers.STUDY_INFO_KEYS[3:],
        "Comment[Manuscript Licence]",
        "Comment[Experimental Metadata Licence]",
        "Comment[Data Repository]",
        "Comment[Data Record Accession]",
        "Comment[Data Record URI]",
        "Comment[Supplementary Information File Name]",
        "Comment[Supplementary Information File Type]",
        "Comment[Supplementary File URI]",
        "Comment[Subject Keywords]",
    ]
    assert expected_headers == study.info.headers

    # Study 1 - Design descriptors
    assert 2 == len(study.designs)
    expected = models.DesignDescriptorsInfo(
        models.OntologyTermRef("genotyping design",
                               "http://purl.obolibrary.org/obo/OBI_0001444",
                               "OBI"),
        (models.Comment("DesignDescsComment", "TestValue01"), ),
        [
            *investigation_headers.STUDY_DESIGN_DESCR_KEYS,
            "Comment[DesignDescsComment]"
        ],
    )
    assert expected == study.designs[1]

    # Study 1 - Publications
    assert 1 == len(study.publications)
    expected = models.PublicationInfo(
        "17439666",
        "doi:10.1186/jbiol54",
        "Castrillo JI, Zeef LA, Hoyle DC, Zhang N, Hayes A, Gardner DC, "
        "Cornell MJ, Petty J, Hakes L, Wardleworth L, Rash B, Brown M, "
        "Dunn WB, Broadhurst D, O'Donoghue K, Hester SS, Dunkley TP, Hart "
        "SR, Swainston N, Li P, Gaskell SJ, Paton NW, Lilley KS, Kell DB, "
        "Oliver SG.",
        "Growth control of the eukaryote cell: a systems biology study in "
        "yeast.",
        models.OntologyTermRef("published", "", ""),
        (models.Comment("StudyPubsComment", "TestValue01"), ),
        [
            *investigation_headers.STUDY_PUBLICATIONS_KEYS[0:4],
            "Comment[StudyPubsComment]",
            *investigation_headers.STUDY_PUBLICATIONS_KEYS[4:],
        ],
    )
    assert expected == study.publications[0]

    # Study 1 - Factors
    assert 2 == len(study.factors)
    expected = models.FactorInfo(
        "rate",
        models.OntologyTermRef("rate",
                               "http://purl.obolibrary.org/obo/PATO_0000161",
                               "PATO"),
        (models.Comment("FactorsComment", "TestValue01"), ),
        [*investigation_headers.STUDY_FACTORS_KEYS, "Comment[FactorsComment]"],
    )
    assert expected == study.factors["rate"]

    # Study 1 - Assays
    assert 3 == len(study.assays)
    expected = models.AssayInfo(
        models.OntologyTermRef("transcription profiling",
                               "http://purl.obolibrary.org/obo/OBI_0000424",
                               "OBI"),
        models.OntologyTermRef("DNA microarray",
                               "http://purl.obolibrary.org/obo/OBI_0400148",
                               "OBI"),
        "Affymetrix",
        Path("a_transcriptome.txt"),
        (models.Comment("AssaysComment",
                        "A comment within ontology terms?"), ),
        [
            *investigation_headers.STUDY_ASSAYS_KEYS[0:5],
            "Comment[AssaysComment]",
            *investigation_headers.STUDY_ASSAYS_KEYS[5:],
        ],
    )
    assert expected == study.assays[2]

    # Study 1 - Protocols
    assert 7 == len(study.protocols)
    expected = models.ProtocolInfo(
        "metabolite extraction",
        models.OntologyTermRef("extraction",
                               "http://purl.obolibrary.org/obo/OBI_0302884",
                               "OBI"),
        "",
        "",
        "",
        {
            "standard volume": models.OntologyTermRef("standard volume", "",
                                                      ""),
            "sample volume": models.OntologyTermRef("sample volume", "", ""),
        },
        {
            "pipette":
            models.ProtocolComponentInfo(
                "pipette",
                models.OntologyTermRef("instrument",
                                       "http://www.ebi.ac.uk/efo/EFO_0000548",
                                       "EFO"),
            )
        },
        (models.Comment("ProtocolsComment", "TestValue01"), ),
        [
            *investigation_headers.STUDY_PROTOCOLS_KEYS[0:7],
            "Comment[ProtocolsComment]",
            *investigation_headers.STUDY_PROTOCOLS_KEYS[7:],
        ],
    )
    assert expected == study.protocols["metabolite extraction"]

    # Study 1 - Contacts
    assert 3 == len(study.contacts)
    expected = models.ContactInfo(
        "Juan",
        "Castrillo",
        "I",
        "",
        "123456789",
        "",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("author",
                               "http://purl.obolibrary.org/obo/RoleO_0000061",
                               "ROLEO"),
        (models.Comment("Study Person REF", ""), ),
        [
            *investigation_headers.STUDY_CONTACTS_KEYS,
            "Comment[Study Person REF]"
        ],
    )
    assert expected == study.contacts[1]

    # Study 2
    study = investigation.studies[1]
    assert "BII-S-2" == study.info.identifier
    assert Path("s_BII-S-2.txt") == study.info.path
    assert "Study Grant Number" == study.info.comments[0].name
    assert "" == study.info.comments[0].value
    assert "Manuscript Licence" == study.info.comments[2].name
    assert "CC BY 3.0" == study.info.comments[2].value

    expected_headers = [
        *investigation_headers.STUDY_INFO_KEYS[0:3],
        "Comment[Study Grant Number]",
        "Comment[Study Funding Agency]",
        *investigation_headers.STUDY_INFO_KEYS[3:],
        "Comment[Manuscript Licence]",
        "Comment[Experimental Metadata Licence]",
        "Comment[Data Repository]",
        "Comment[Data Record Accession]",
        "Comment[Data Record URI]",
        "Comment[Supplementary Information File Name]",
        "Comment[Supplementary Information File Type]",
        "Comment[Supplementary File URI]",
        "Comment[Subject Keywords]",
    ]
    assert expected_headers == study.info.headers

    # Study 2 - Contacts
    assert 3 == len(study.contacts)
    expected = models.ContactInfo(
        "Juan",
        "Castrillo",
        "I",
        "",
        "123456789",
        "",
        "Oxford Road, Manchester M13 9PT, UK",
        "Faculty of Life Sciences, Michael Smith Building, "
        "University of Manchester",
        models.OntologyTermRef("author",
                               "http://purl.obolibrary.org/obo/RoleO_0000061",
                               "ROLEO"),
        (models.Comment("Study Person REF", "personB"), ),
        [
            *investigation_headers.STUDY_CONTACTS_KEYS,
            "Comment[Study Person REF]"
        ],
    )
    assert expected == study.contacts[1]
Exemplo n.º 26
0
def create_and_write(out_path):
    """Create an investigation with a study and assay and write to ``output_path``."""

    # Prepare one or more study sections
    # Prepare basic study information
    study_info = models.BasicInfo(
        path="s_minimal.txt",
        identifier="s_minimal",
        title="Germline Study",
        description=None,
        submission_date=None,
        public_release_date=None,
        comments=(
            models.Comment(name="Study Grant Number", value=None),
            models.Comment(name="Study Funding Agency", value=None),
        ),
        headers=[],
    )

    # Create one or more assays
    assay_01 = models.AssayInfo(
        measurement_type=models.OntologyTermRef(
            name="exome sequencing assay",
            accession="http://purl.obolibrary.org/obo/OBI_0002118",
            ontology_name="OBI",
        ),
        technology_type=models.OntologyTermRef(
            name="nucleotide sequencing",
            accession="http://purl.obolibrary.org/obo/OBI_0000626",
            ontology_name="OBI",
        ),
        platform=None,
        path="a_minimal.txt",
        comments=(),
        headers=[],
    )

    # Prepare one or more protocols
    protocol_01 = models.ProtocolInfo(
        name="sample collection",
        type=models.OntologyTermRef(name="sample collection"),
        description=None,
        uri=None,
        version=None,
        parameters={},
        components={},
        comments=(),
        headers=[],
    )
    protocol_02 = models.ProtocolInfo(
        name="nucleic acid sequencing",
        type=models.OntologyTermRef(name="nucleic acid sequencing"),
        description=None,
        uri=None,
        version=None,
        parameters={},
        components={},
        comments=(),
        headers=[],
    )

    # Create study object
    study_01 = models.StudyInfo(
        info=study_info,
        designs=(),
        publications=(),
        factors={},
        assays=(assay_01, ),
        protocols={
            protocol_01.name: protocol_01,
            protocol_02.name: protocol_02
        },
        contacts=(),
    )

    # Prepare other investigation section
    # Prepare one or more ontology term source references
    onto_ref_01 = models.OntologyRef(
        name="OBI",
        file="http://data.bioontology.org/ontologies/OBI",
        version="31",
        description="Ontology for Biomedical Investigations",
        comments=(),
        headers=[],
    )

    # Prepare basic investigation information
    invest_info = models.BasicInfo(
        path="i_minimal.txt",
        identifier="i_minimal",
        title="Minimal Investigation",
        description=None,
        submission_date=None,
        public_release_date=None,
        comments=(),
        headers=[],
    )

    # Create investigation object
    investigation = models.InvestigationInfo(
        ontology_source_refs={onto_ref_01.name: onto_ref_01},
        info=invest_info,
        publications=(),
        contacts=(),
        studies=(study_01, ),
    )

    # Validate investigation
    InvestigationValidator(investigation).validate()

    # Write the investigation as ISA-Tab txt file
    with open(join(out_path, investigation.info.path), "wt",
              newline="") as outputf:
        InvestigationWriter.from_stream(investigation=investigation,
                                        output_file=outputf).write()

    # Create a corresponding Study graph

    # Create at least on source, one sample and one collection process
    # Unique names are required for unambiguous node identification
    source_01 = models.Material(
        type="Source Name",
        unique_name="S1-source-0815",
        name="0815",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.SOURCE_NAME],
    )

    sample_01 = models.Material(
        type="Sample Name",
        unique_name="S1-sample-0815-N1",
        name="0815-N1",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.SAMPLE_NAME],
    )

    process_01 = models.Process(
        protocol_ref="sample collection",
        unique_name="S1-sample collection-2-1",
        name=None,
        name_type=None,
        date=None,
        performer=None,
        parameter_values=(),
        comments=(),
        array_design_ref=None,
        first_dimension=None,
        second_dimension=None,
        headers=[table_headers.PROTOCOL_REF],
    )

    # Create the arcs to connect the material and process nodes, referenced by the unique name
    arc_01 = models.Arc(tail="S1-source-0815", head="S1-sample collection-2-1")
    arc_02 = models.Arc(tail="S1-sample collection-2-1",
                        head="S1-sample-0815-N1")

    # Create the study graph object
    study_graph_01 = models.Study(
        file=investigation.studies[0].info.path,
        header=None,
        materials={
            source_01.unique_name: source_01,
            sample_01.unique_name: sample_01
        },
        processes={process_01.unique_name: process_01},
        arcs=(arc_01, arc_02),
    )

    # Validate study graph
    StudyValidator(investigation=investigation,
                   study_info=investigation.studies[0],
                   study=study_graph_01).validate()

    # Write the study as ISA-Tab txt file
    with open(join(out_path, investigation.studies[0].info.path),
              "wt",
              newline="") as outputf:
        StudyWriter.from_stream(study_or_assay=study_graph_01,
                                output_file=outputf).write()

    # Create a corresponding Assay graph

    # Create at least on samples, one output material and one collection process
    # Unique names are required for unambiguous node identification
    # Explicit header definition per node is currently required to enable export to ISA-Tab
    sample_01 = models.Material(
        type="Sample Name",
        unique_name="S1-sample-0815-N1",
        name="0815-N1",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.SAMPLE_NAME],
    )

    data_file_01 = models.Material(
        type="Raw Data File",
        unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4",
        name="0815-N1-DNA1-WES1_L???_???_R1.fastq.gz",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.RAW_DATA_FILE],
    )

    data_file_02 = models.Material(
        type="Raw Data File",
        unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5",
        name="0815-N1-DNA1-WES1_L???_???_R2.fastq.gz",
        extract_label=None,
        characteristics=(),
        comments=(),
        factor_values=(),
        material_type=None,
        headers=[table_headers.RAW_DATA_FILE],
    )

    process_01 = models.Process(
        protocol_ref="nucleic acid sequencing",
        unique_name="S1-A1-0815-N1-DNA1-WES1-3",
        name="0815-N1-DNA1-WES1",
        name_type="Assay Name",
        date=None,
        performer=None,
        parameter_values=(),
        comments=(),
        array_design_ref=None,
        first_dimension=None,
        second_dimension=None,
        headers=[table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME],
    )

    # Create the arcs to connect the material and process nodes, referenced by the unique name
    arcs = (
        models.Arc(tail="S1-sample-0815-N1", head="S1-A1-0815-N1-DNA1-WES1-3"),
        models.Arc(
            tail="S1-A1-0815-N1-DNA1-WES1-3",
            head="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4",
        ),
        models.Arc(
            tail="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4",
            head="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5",
        ),
    )

    # Create the assay graph object
    assay_graph_01 = models.Assay(
        file=investigation.studies[0].assays[0].path,
        header=None,
        materials={
            sample_01.unique_name: sample_01,
            data_file_01.unique_name: data_file_01,
            data_file_02.unique_name: data_file_02,
        },
        processes={process_01.unique_name: process_01},
        arcs=arcs,
    )

    # Validate assay graph
    AssayValidator(
        investigation=investigation,
        study_info=investigation.studies[0],
        assay_info=investigation.studies[0].assays[0],
        assay=assay_graph_01,
    ).validate()

    # Write the assay as ISA-Tab txt file
    with open(join(out_path, investigation.studies[0].assays[0].path),
              "wt",
              newline="") as outputf:
        AssayWriter.from_stream(study_or_assay=assay_graph_01,
                                output_file=outputf).write()