Пример #1
0
def test_header_exception_labeled_header_not_allowed(
        assay_file_exception_labeled_header_not_allowed):
    with pytest.raises(ParseIsatabException) as excinfo:
        AssayReader.from_stream(
            "S1", "A1", assay_file_exception_labeled_header_not_allowed)
    msg = 'Header "Factor Value" not allowed in assay.'
    assert msg == str(excinfo.value)
Пример #2
0
def test_header_exception_term_source_ref_stop_iteration(
        assay_file_exception_term_source_ref_stop_iteration):
    with pytest.raises(ParseIsatabException) as excinfo:
        AssayReader.from_stream(
            "S1", "A1", assay_file_exception_term_source_ref_stop_iteration)
    msg = 'Expected one more column on seeing "Term Source REF"'
    assert msg == str(excinfo.value)
Пример #3
0
def test_header_exception_term_source_ref_next_column(
        assay_file_exception_term_source_ref_next_column):
    with pytest.raises(ParseIsatabException) as excinfo:
        AssayReader.from_stream(
            "S1", "A1", assay_file_exception_term_source_ref_next_column)
    msg = 'Expected column "Term Accession Number" after seeing "Term Source REF"'
    assert msg == str(excinfo.value)
Пример #4
0
def test_header_exception_labeled_header_format(
        assay_file_exception_labeled_header_format):
    with pytest.raises(ParseIsatabException) as excinfo:
        AssayReader.from_stream("S1", "A1",
                                assay_file_exception_labeled_header_format)
    msg = "Problem parsing labeled header CharacteristicsWithoutBrackets"
    assert msg == str(excinfo.value)
Пример #5
0
def test_header_exception_duplicated_header(
        assay_file_exception_duplicated_header):
    with pytest.raises(ParseIsatabException) as excinfo:
        AssayReader.from_stream("S1", "A1",
                                assay_file_exception_duplicated_header).read()
    msg = "Found duplicated column types in header of study S1 assay A1: Characteristics[Organism]"
    assert msg == str(excinfo.value)
Пример #6
0
def test_parsing_exception_invalid_column_type(assay_file_exception_invalid_column_type):
    with pytest.raises(ParseIsatabException) as excinfo:
        AssayReader.from_stream("S1", "A1", assay_file_exception_invalid_column_type).read()
    msg = (
        "Invalid column type occured \"Parameter Value\" not in ('Material Type', "
        "'Characteristics', 'Comment', 'Factor Value', 'Label', 'Term Source REF', 'Unit')"
    )
    assert msg == str(excinfo.value)
Пример #7
0
 def get(self, project_uuid=None) -> IsaData:
     raw_data = self.get_raw(project_uuid)
     investigation = InvestigationReader.from_stream(
         input_file=io.StringIO(raw_data["investigation"]["tsv"]),
         filename=raw_data["investigation"]["path"],
     ).read()
     studies = {
         path:
         StudyReader.from_stream(study_id=path,
                                 input_file=io.StringIO(details["tsv"]),
                                 filename=path).read()
         for path, details in raw_data["studies"].items()
     }
     if len(studies) > 1:  # pragma: nocover
         raise UnsupportedIsaTabFeatureException(
             "More than one study found!")
     study = list(studies.values())[0]
     assays = {
         path: AssayReader.from_stream(
             study_id=study.file,
             assay_id=path,
             input_file=io.StringIO(details["tsv"]),
             filename=path,
         ).read()
         for path, details in raw_data["assays"].items()
     }
     return IsaData(investigation, raw_data["investigation"]["path"],
                    studies, assays)
Пример #8
0
def run_warnings_caught(args):
    # Read investigation
    investigation = InvestigationReader.from_stream(args.input_investigation_file).read()
    args.input_investigation_file.close()

    # Validate investigation
    InvestigationValidator(investigation).validate()

    # Read studies and assays
    path_in = os.path.normpath(os.path.dirname(args.input_investigation_file.name))
    studies = {}
    assays = {}
    for s, study_info in enumerate(investigation.studies):
        if study_info.info.path:
            with open(os.path.join(path_in, study_info.info.path), "rt") as inputf:
                studies[s] = StudyReader.from_stream("S{}".format(s + 1), inputf).read()
        if study_info.assays:
            assays[s] = {}
        for a, assay_info in enumerate(study_info.assays):
            if assay_info.path:
                with open(os.path.join(path_in, assay_info.path), "rt") as inputf:
                    assays[s][a] = AssayReader.from_stream(
                        "S{}".format(s + 1), "A{}".format(a + 1), inputf
                    ).read()

    # Validate studies and assays
    for s, study_info in enumerate(investigation.studies):
        if study_info.info.path:
            StudyValidator(investigation, study_info, studies[s]).validate()
        for a, assay_info in enumerate(study_info.assays):
            if assay_info.path:
                AssayValidator(investigation, study_info, assay_info, assays[s][a]).validate()
Пример #9
0
def load_investigation(i_path: typing.Union[str, Path]) -> IsaData:
    """Load investigation information from investigation files.

    Study and assay files are expected to be next to the investigation file.
    """
    i_path = Path(i_path)
    with i_path.open("rt") as i_file:
        investigation = InvestigationReader.from_stream(
            input_file=i_file, filename=i_path.name).read()

    studies = {}
    assays = {}
    for study in investigation.studies:
        with (i_path.parent / study.info.path).open() as s_file:
            studies[study.info.path.name] = StudyReader.from_stream(
                study_id=study.info.path.name, input_file=s_file).read()
            for assay in study.assays:
                with (i_path.parent / assay.path).open() as a_file:
                    assays[assay.path.name] = AssayReader.from_stream(
                        study_id=studies[study.info.path.name].file.name,
                        assay_id=assay.path.name,
                        input_file=a_file,
                    ).read()

    return IsaData(investigation, str(i_path), studies, assays)
Пример #10
0
def test_assay_reader_minimal_assay_iostring2(minimal_investigation_file,
                                              minimal_assay_file):
    # Load investigation (tested elsewhere)
    stringio = io.StringIO(minimal_investigation_file.read())
    investigation = InvestigationReader.from_stream(stringio).read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()

    # Check warnings
    assert 2 == len(record)

    # Create new assay reader and read from StringIO with no filename indicated
    stringio = io.StringIO(minimal_assay_file.read())
    reader = AssayReader.from_stream("S1", "A1", stringio)
    assert 5 == len(reader.header)

    # Read and validate assay
    assay = reader.read()
    AssayValidator(investigation, investigation.studies[0],
                   investigation.studies[0].assays[0], assay).validate()

    # Check results
    assert str(assay.file) == os.path.normpath("<no file>")
    assert 5 == len(assay.header)
    assert 3 == len(assay.materials)
    assert 1 == len(assay.processes)
    assert 3 == len(assay.arcs)
Пример #11
0
def run(args):
    with open(args.investigation_file, "rt") as inputf:
        investigation = InvestigationReader.from_stream(inputf).read()

    path = os.path.dirname(args.investigation_file)

    print("digraph investigation {", file=args.output_file)
    print('  rankdir = "LR";', file=args.output_file)

    for s, study_info in enumerate(investigation.studies):
        with open(os.path.join(path, study_info.info.path), "rt") as inputf:
            study = StudyReader.from_stream("S{}".format(s + 1), inputf).read()
        print("  /* study {} */".format(study_info.info.path),
              file=args.output_file)
        print("  subgraph clusterStudy{} {{".format(s), file=args.output_file)
        print('    label = "Study: {}"'.format(study_info.info.path),
              file=args.output_file)
        print_dot(study, args.output_file)
        print("  }", file=args.output_file)

        for a, assay_info in enumerate(study_info.assays):
            with open(os.path.join(path, assay_info.path), "rt") as inputf:
                assay = AssayReader.from_stream("S{}".format(s + 1),
                                                "A{}".format(a + 1),
                                                inputf).read()
            print("  /* assay {} */".format(assay_info.path),
                  file=args.output_file)
            print("  subgraph clusterAssayS{}A{} {{".format(s, a),
                  file=args.output_file)
            print('    label = "Assay: {}"'.format(assay_info.path),
                  file=args.output_file)
            print_dot(assay, args.output_file)
            print("  }", file=args.output_file)

    print("}", file=args.output_file)
Пример #12
0
def _parse_write_assert_assay(investigation_file,
                              tmp_path,
                              quote=None,
                              normalize=False,
                              skip=None):
    # Load investigation
    investigation = InvestigationReader.from_stream(investigation_file).read()
    InvestigationValidator(investigation).validate()
    directory = os.path.normpath(os.path.dirname(investigation_file.name))
    # Iterate assays
    for s, study_info in enumerate(investigation.studies):
        for a, assay_info in enumerate(study_info.assays):
            if skip and str(assay_info.path) in skip:
                continue
            # Load assay
            path_in = os.path.join(directory, assay_info.path)
            with open(path_in, "rt") as inputf:
                assay = AssayReader.from_stream("S{}".format(s + 1),
                                                "A{}".format(a + 1),
                                                inputf).read()
            AssayValidator(investigation, study_info, assay_info,
                           assay).validate()
            # Write assay to temporary file
            path_out = tmp_path / assay_info.path
            with open(path_out, "wt", newline="") as file:
                AssayWriter.from_stream(assay, file, quote=quote).write()
            if normalize:
                # Read and write assay again
                path_in = path_out
                with open(path_out, "rt") as inputf:
                    assay = AssayReader.from_stream("S{}".format(s + 1),
                                                    "A{}".format(a + 1),
                                                    inputf).read()
                AssayValidator(investigation, study_info, assay_info,
                               assay).validate()
                path_out = tmp_path / (assay_info.path.name + "_b")
                with open(path_out, "wt", newline="") as file:
                    AssayWriter.from_stream(assay, file, quote=quote).write()
            # Sort and compare input and output
            path_in_s = tmp_path / (assay_info.path.name + ".in.sorted")
            path_out_s = tmp_path / (assay_info.path.name + ".out.sorted")
            assert filecmp.cmp(sort_file(path_in, path_in_s),
                               sort_file(path_out, path_out_s),
                               shallow=False)
Пример #13
0
    def parse_isatab(self):
        """
        parse sample info from ISA-tab table
        """
        logger.info("Parsing ISA-tab...")
        logger.info("Read assay file: %s", self.args.isa_assay.name)

        # read assay
        assay = AssayReader.from_stream("S1", "A1", self.args.isa_assay).read()

        # extract relevant fields
        dummy = Bunch(type="", protocol_ref="")
        sample_info = {}
        arc_map = {a.tail: a.head for a in assay.arcs}
        for m in assay.materials.values():
            if m.type == "Sample Name":
                sample_name = m.name
                if sample_name not in sample_info:
                    sample_info[sample_name] = {}
                key = m.unique_name
                # breakpoint()
                while key in arc_map:
                    key = arc_map[key]
                    if re.match(
                            "Library construction [a-z]*RNA[-_][Ss]eq",
                            assay.processes.get(key, dummy).protocol_ref,
                    ):
                        for p in assay.processes[key].parameter_values:
                            if p.name == "Library layout":
                                sample_info[sample_name]["paired"] = (
                                    True if p.value == "PAIRED" else False)
                            elif p.name == "Library strand-specificity":
                                sample_info[sample_name][
                                    "stranded"] = p.value.lower()
                    elif re.match(
                            "Nucleic acid sequencing [a-z]*RNA[-_][Ss]eq",
                            assay.processes.get(key, dummy).protocol_ref,
                    ):
                        for p in assay.processes[key].parameter_values:
                            if p.name == "Instrument model":
                                sample_info[sample_name][
                                    "instrument"] = ",".join(p.value)
                            elif (p.name == "Platform" and "instrument"
                                  not in sample_info[sample_name]):
                                sample_info[sample_name][
                                    "instrument"] = ",".join(p.value)
                            elif p.name == "Target read length":
                                sample_info[sample_name][
                                    "read_length"] = p.value

        logger.info("Samples in ISA assay:\n%s", ", ".join(sample_info))
        logger.debug(sample_info)

        self.sample_info = sample_info
Пример #14
0
    def parse_isatab(self):
        """
        parse sample info from ISA-tab table
        """
        logger.info("Parsing ISA-tab...")
        logger.info("Read assay file: %s", self.args.isa_assay.name)

        # read assay
        assay = AssayReader.from_stream("S1", "A1", self.args.isa_assay).read()

        # extract relevant fields
        dummy = Bunch(type="", protocol_ref="")
        sample_info = {}
        arc_map = {a.tail: a.head for a in assay.arcs}
        for m in assay.materials.values():
            if m.type == "Sample Name":
                sample_name = m.name
                if sample_name not in sample_info:
                    sample_info[sample_name] = {}
                key = m.unique_name
                # breakpoint()
                while key in arc_map:
                    key = arc_map[key]
                    if re.match(
                            "Library construction [a-z]*RNA[-_]?[Ss]eq",
                            assay.processes.get(key, dummy).protocol_ref,
                    ):
                        self._parse_isatab_library_construction(
                            assay, key, sample_info, sample_name)

                    elif re.match(
                            "Nucleic acid sequencing [a-z]*RNA[-_]?[Ss]eq",
                            assay.processes.get(key, dummy).protocol_ref,
                    ):
                        self._parse_isatab_sequencing(assay, key, sample_info,
                                                      sample_name)

        logger.info("Samples in ISA assay:\n%s", ", ".join(sample_info))
        logger.debug(sample_info)

        self.sample_info = sample_info
Пример #15
0
def test_assay_reader_minimal_assay(minimal_investigation_file,
                                    minimal_assay_file):
    """Use ``AssayReader`` to read in minimal assay file.

    Using the ``AssayReader`` instead of the ``AssayRowReader`` gives us
    ``Assay`` objects instead of just the row-wise nodes.
    """
    # Load investigation (tested elsewhere)
    investigation = InvestigationReader.from_stream(
        minimal_investigation_file).read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()

    # Check warnings
    assert 1 == len(record)

    # Create new row reader and check read headers
    reader = AssayReader.from_stream("S1", "A1", minimal_assay_file)
    assert 5 == len(reader.header)

    # Read and validate assay
    assay = reader.read()
    AssayValidator(investigation, investigation.studies[0],
                   investigation.studies[0].assays[0], assay).validate()

    # Check results
    assert os.path.normpath(str(assay.file)).endswith(
        os.path.normpath("data/i_minimal/a_minimal.txt"))
    assert 5 == len(assay.header)
    assert 3 == len(assay.materials)
    assert 1 == len(assay.processes)
    assert 3 == len(assay.arcs)

    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-N1",
        "0815-N1",
        None,
        (),
        (),
        (),
        None,
        [table_headers.SAMPLE_NAME],
    )
    assert expected == assay.materials["S1-sample-0815-N1"]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4",
        "0815-N1-DNA1-WES1_L???_???_R1.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == assay.materials[
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4"]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5",
        "0815-N1-DNA1-WES1_L???_???_R2.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == assay.materials[
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5"]

    expected = models.Process(
        "nucleic acid sequencing",
        "S1-A1-0815-N1-DNA1-WES1-3",
        "0815-N1-DNA1-WES1",
        "Assay Name",
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME],
    )
    assert expected == assay.processes["S1-A1-0815-N1-DNA1-WES1-3"]

    expected = (
        models.Arc("S1-sample-0815-N1", "S1-A1-0815-N1-DNA1-WES1-3"),
        models.Arc("S1-A1-0815-N1-DNA1-WES1-3",
                   "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4"),
        models.Arc(
            "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4",
            "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5",
        ),
    )
    assert expected == assay.arcs
Пример #16
0
def test_assay_reader_gelelect(gelelect_investigation_file,
                               gelelect_assay_file):
    """Use ``AssayReader`` to read in small assay file."""
    with pytest.warns(IsaWarning) as record:
        # Load investigation
        investigation = InvestigationReader.from_stream(
            gelelect_investigation_file).read()
        InvestigationValidator(investigation).validate()

        # Create new row reader and check read headers
        reader = AssayReader.from_stream("S1", "A1", gelelect_assay_file)
        assert 22 == len(reader.header)

        # Read assay
        assay = reader.read()
        AssayValidator(investigation, investigation.studies[0],
                       investigation.studies[0].assays[0], assay).validate()

    # Check warnings
    assert 4 == len(record)

    # Check results
    assert os.path.normpath(str(assay.file)).endswith(
        os.path.normpath(
            "data/test_gelelect/a_study01_protein_expression_profiling_gel_electrophoresis.txt"
        ))
    assert 22 == len(assay.header)
    assert 9 == len(assay.materials)
    assert 10 == len(assay.processes)
    assert 18 == len(assay.arcs)

    expected = models.Material(
        "Image File",
        "S1-A1-Image01.jpeg-COL19",
        "Image01.jpeg",
        None,
        (),
        (),
        (),
        None,
        [table_headers.IMAGE_FILE],
    )
    assert expected == assay.materials["S1-A1-Image01.jpeg-COL19"]

    expected = models.Process(
        "data collection",
        "S1-A1-Scan02-18",
        "Scan02",
        "Scan Name",
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF, table_headers.SCAN_NAME],
    )
    assert expected == assay.processes["S1-A1-Scan02-18"]

    header_electrophoresis = [
        table_headers.PROTOCOL_REF,
        table_headers.GEL_ELECTROPHORESIS_ASSAY_NAME,
        table_headers.FIRST_DIMENSION,
        table_headers.TERM_SOURCE_REF,
        table_headers.TERM_ACCESSION_NUMBER,
        table_headers.SECOND_DIMENSION,
        table_headers.TERM_SOURCE_REF,
        table_headers.TERM_ACCESSION_NUMBER,
    ]

    expected = models.Process(
        "electrophoresis",
        "S1-A1-Assay01-10",
        "Assay01",
        "Gel Electrophoresis Assay Name",
        None,
        None,
        (),
        (),
        None,
        models.OntologyTermRef("", "", ""),
        models.OntologyTermRef("", "", ""),
        header_electrophoresis,
    )
    assert expected == assay.processes["S1-A1-Assay01-10"]

    expected = models.Process(
        "electrophoresis",
        "S1-A1-electrophoresis-9-2",
        "",
        "Gel Electrophoresis Assay Name",
        None,
        None,
        (),
        (),
        None,
        models.OntologyTermRef("AssayX", None, None),
        models.OntologyTermRef("AssayY", None, None),
        header_electrophoresis,
    )
    assert expected == assay.processes["S1-A1-electrophoresis-9-2"]
Пример #17
0
def test_assay_reader_small2_assay(small2_investigation_file,
                                   small2_assay_file):
    """Use ``AssayReader`` to read in small assay file."""
    # Load investigation (tested elsewhere)
    investigation = InvestigationReader.from_stream(
        small2_investigation_file).read()
    InvestigationValidator(investigation).validate()

    # Create new row reader and check read headers
    reader = AssayReader.from_stream("S1", "A1", small2_assay_file)
    assert 14 == len(reader.header)

    # Read assay
    assay = reader.read()
    AssayValidator(investigation, investigation.studies[0],
                   investigation.studies[0].assays[0], assay).validate()

    # Check results
    assert os.path.normpath(str(assay.file)).endswith(
        os.path.normpath("data/i_small2/a_small2.txt"))
    assert 14 == len(assay.header)
    assert 25 == len(assay.materials)
    assert 41 == len(assay.processes)
    assert 74 == len(assay.arcs)

    # Comments
    expected = models.Comment(name="Replicate", value="B")
    assert assay.materials["S1-A1-0815-T1-Pro1-B-115-COL5"].comments[
        0] == expected

    # Expected arcs
    expected = (
        models.Arc("S1-sample-0815-N1", "S1-A1-extraction-2-1"),
        models.Arc("S1-sample-0815-T1", "S1-A1-extraction-2-2"),
        models.Arc("S1-A1-extraction-2-1", "S1-A1-0815-N1-Pro1-COL3"),
        models.Arc("S1-A1-extraction-2-2", "S1-A1-0815-T1-Pro1-COL3"),
        models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-1"),
        models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-2"),
        models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-3"),
        models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-4"),
        models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-5"),
        models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-6"),
        models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-7"),
        models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-8"),
        models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-9"),
        models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-10"),
        models.Arc("S1-A1-0815-N1-Pro1-COL3", "S1-A1-labeling-4-11"),
        models.Arc("S1-A1-0815-T1-Pro1-COL3", "S1-A1-labeling-4-12"),
        models.Arc("S1-A1-labeling-4-1", "S1-A1-0815-N1-Pro1-A-114-COL5"),
        models.Arc("S1-A1-labeling-4-2", "S1-A1-0815-T1-Pro1-A-115-COL5"),
        models.Arc("S1-A1-labeling-4-3", "S1-A1-0815-N1-Pro1-B-114-COL5"),
        models.Arc("S1-A1-labeling-4-4", "S1-A1-0815-T1-Pro1-B-115-COL5"),
        models.Arc("S1-A1-labeling-4-5", "S1-A1-0815-N1-Pro1-C-114-COL5"),
        models.Arc("S1-A1-labeling-4-6", "S1-A1-0815-T1-Pro1-C-115-COL5"),
        models.Arc("S1-A1-labeling-4-7", "S1-A1-0815-N1-Pro1-D-114-COL5"),
        models.Arc("S1-A1-labeling-4-8", "S1-A1-0815-T1-Pro1-D-115-COL5"),
        models.Arc("S1-A1-labeling-4-9", "S1-A1-0815-N1-Pro1-E-114-COL5"),
        models.Arc("S1-A1-labeling-4-10", "S1-A1-0815-T1-Pro1-E-115-COL5"),
        models.Arc("S1-A1-labeling-4-11", "S1-A1-0815-N1-Pro1-F-114-COL5"),
        models.Arc("S1-A1-labeling-4-12", "S1-A1-0815-T1-Pro1-F-115-COL5"),
        models.Arc("S1-A1-0815-N1-Pro1-A-114-COL5",
                   "S1-A1-chromatography-8-1"),
        models.Arc("S1-A1-0815-T1-Pro1-A-115-COL5",
                   "S1-A1-chromatography-8-2"),
        models.Arc("S1-A1-0815-N1-Pro1-B-114-COL5",
                   "S1-A1-chromatography-8-3"),
        models.Arc("S1-A1-0815-T1-Pro1-B-115-COL5",
                   "S1-A1-chromatography-8-4"),
        models.Arc("S1-A1-0815-N1-Pro1-C-114-COL5",
                   "S1-A1-chromatography-8-5"),
        models.Arc("S1-A1-0815-T1-Pro1-C-115-COL5",
                   "S1-A1-chromatography-8-6"),
        models.Arc("S1-A1-0815-N1-Pro1-D-114-COL5",
                   "S1-A1-chromatography-8-7"),
        models.Arc("S1-A1-0815-T1-Pro1-D-115-COL5",
                   "S1-A1-chromatography-8-8"),
        models.Arc("S1-A1-0815-N1-Pro1-E-114-COL5",
                   "S1-A1-chromatography-8-9"),
        models.Arc("S1-A1-0815-T1-Pro1-E-115-COL5",
                   "S1-A1-chromatography-8-10"),
        models.Arc("S1-A1-0815-N1-Pro1-F-114-COL5",
                   "S1-A1-chromatography-8-11"),
        models.Arc("S1-A1-0815-T1-Pro1-F-115-COL5",
                   "S1-A1-chromatography-8-12"),
        models.Arc("S1-A1-chromatography-8-1", "S1-A1-poolA-10"),
        models.Arc("S1-A1-chromatography-8-2", "S1-A1-poolA-10"),
        models.Arc("S1-A1-chromatography-8-3", "S1-A1-mass spectrometry-9-3"),
        models.Arc("S1-A1-chromatography-8-4", "S1-A1-mass spectrometry-9-4"),
        models.Arc("S1-A1-chromatography-8-5", "S1-A1-poolC-10"),
        models.Arc("S1-A1-chromatography-8-6", "S1-A1-poolC-10"),
        models.Arc("S1-A1-chromatography-8-7", "S1-A1-mass spectrometry-9-7"),
        models.Arc("S1-A1-chromatography-8-8", "S1-A1-mass spectrometry-9-8"),
        models.Arc("S1-A1-chromatography-8-9", "S1-A1-poolE-10"),
        models.Arc("S1-A1-chromatography-8-10", "S1-A1-poolE-10"),
        models.Arc("S1-A1-chromatography-8-11", "S1-A1-poolF-10"),
        models.Arc("S1-A1-chromatography-8-12", "S1-A1-poolF-10"),
        models.Arc("S1-A1-poolA-10", "S1-A1-poolA.raw-COL11"),
        models.Arc("S1-A1-mass spectrometry-9-3", "S1-A1-poolB.raw-COL11"),
        models.Arc("S1-A1-mass spectrometry-9-4", "S1-A1-poolB.raw-COL11"),
        models.Arc("S1-A1-poolC-10",
                   "S1-A1-Empty Raw Spectral Data File-11-5"),
        models.Arc("S1-A1-mass spectrometry-9-7",
                   "S1-A1-Empty Raw Spectral Data File-11-7"),
        models.Arc("S1-A1-mass spectrometry-9-8",
                   "S1-A1-Empty Raw Spectral Data File-11-8"),
        models.Arc("S1-A1-poolE-10", "S1-A1-poolE.raw-COL11"),
        models.Arc("S1-A1-poolF-10",
                   "S1-A1-Empty Raw Spectral Data File-11-11"),
        models.Arc("S1-A1-poolA.raw-COL11", "S1-A1-data transformation-12-1"),
        models.Arc("S1-A1-poolB.raw-COL11", "S1-A1-data transformation-12-3"),
        models.Arc("S1-A1-Empty Raw Spectral Data File-11-5",
                   "S1-A1-data transformation-12-5"),
        models.Arc("S1-A1-Empty Raw Spectral Data File-11-7",
                   "S1-A1-data transformation-12-7"),
        models.Arc("S1-A1-Empty Raw Spectral Data File-11-8",
                   "S1-A1-data transformation-12-8"),
        models.Arc("S1-A1-poolE.raw-COL11", "S1-A1-data transformation-12-9"),
        models.Arc("S1-A1-Empty Raw Spectral Data File-11-11",
                   "S1-A1-data analysis-13"),
        models.Arc("S1-A1-data transformation-12-1",
                   "S1-A1-results.csv-COL14"),
        models.Arc("S1-A1-data transformation-12-3",
                   "S1-A1-results.csv-COL14"),
        models.Arc("S1-A1-data transformation-12-5",
                   "S1-A1-results.csv-COL14"),
        models.Arc("S1-A1-data transformation-12-7",
                   "S1-A1-results.csv-COL14"),
        models.Arc("S1-A1-data transformation-12-8",
                   "S1-A1-results.csv-COL14"),
        models.Arc("S1-A1-data transformation-12-9",
                   "S1-A1-Empty Derived Data File-14-9"),
        models.Arc("S1-A1-data analysis-13", "S1-A1-results.csv-COL14"),
    )
    assert sorted(expected) == sorted(assay.arcs)
Пример #18
0
def test_assay_reader_small_assay(small_investigation_file, small_assay_file):
    """Use ``AssayReader`` to read in small assay file."""
    # Load investigation (tested elsewhere)
    investigation = InvestigationReader.from_stream(
        small_investigation_file).read()
    with pytest.warns(IsaWarning) as record:
        InvestigationValidator(investigation).validate()

    # Check warnings
    assert 1 == len(record)

    # Create new row reader and check read headers
    reader = AssayReader.from_stream("S1", "A1", small_assay_file)
    assert 9 == len(reader.header)

    # Read assay
    with pytest.warns(IsaWarning) as record:
        assay = reader.read()
        AssayValidator(investigation, investigation.studies[0],
                       investigation.studies[0].assays[0], assay).validate()

    # Check warnings
    assert 1 == len(record)

    # Check results
    assert os.path.normpath(str(assay.file)).endswith(
        os.path.normpath("data/i_small/a_small.txt"))
    assert 9 == len(assay.header)
    assert 9 == len(assay.materials)
    assert 5 == len(assay.processes)
    assert 13 == len(assay.arcs)

    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-N1",
        "0815-N1",
        None,
        (),
        (),
        (),
        None,
        [table_headers.SAMPLE_NAME],
    )
    assert expected == assay.materials["S1-sample-0815-N1"]
    expected = models.Material(
        "Sample Name",
        "S1-sample-0815-T1",
        "0815-T1",
        None,
        (),
        (),
        (),
        None,
        [table_headers.SAMPLE_NAME],
    )
    assert expected == assay.materials["S1-sample-0815-T1"]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6",
        "0815-N1-DNA1-WES1_L???_???_R1.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == assay.materials[
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
        "0815-N1-DNA1-WES1_L???_???_R2.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == assay.materials[
        "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7"]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6",
        "0815-T1-DNA1-WES1_L???_???_R1.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == assay.materials[
        "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"]
    expected = models.Material(
        "Raw Data File",
        "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
        "0815-T1-DNA1-WES1_L???_???_R2.fastq.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.RAW_DATA_FILE],
    )
    assert expected == assay.materials[
        "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7"]
    expected = models.Material(
        "Derived Data File",
        "S1-A1-0815-somatic.vcf.gz-COL9",
        "0815-somatic.vcf.gz",
        None,
        (),
        (),
        (),
        None,
        [table_headers.DERIVED_DATA_FILE],
    )
    assert expected == assay.materials["S1-A1-0815-somatic.vcf.gz-COL9"]

    expected = models.Process(
        "library preparation",
        "S1-A1-library preparation-2-1",
        None,
        None,
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF],
    )
    assert expected == assay.processes["S1-A1-library preparation-2-1"]
    expected = models.Process(
        "library preparation",
        "S1-A1-library preparation-2-2",
        None,
        None,
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF],
    )
    assert expected == assay.processes["S1-A1-library preparation-2-2"]
    expected = models.Process(
        "nucleic acid sequencing",
        "S1-A1-0815-N1-DNA1-WES1-5",
        "0815-N1-DNA1-WES1",
        "Assay Name",
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME],
    )
    assert expected == assay.processes["S1-A1-0815-N1-DNA1-WES1-5"]
    expected = models.Process(
        "nucleic acid sequencing",
        "S1-A1-0815-T1-DNA1-WES1-5",
        "0815-T1-DNA1-WES1",
        "Assay Name",
        None,
        None,
        (),
        (),
        None,
        None,
        None,
        [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME],
    )
    assert expected == assay.processes["S1-A1-0815-T1-DNA1-WES1-5"]

    expected = (
        models.Arc("S1-sample-0815-N1", "S1-A1-library preparation-2-1"),
        models.Arc("S1-A1-library preparation-2-1", "S1-A1-0815-N1-DNA1-COL3"),
        models.Arc("S1-A1-0815-N1-DNA1-COL3", "S1-A1-0815-N1-DNA1-WES1-5"),
        models.Arc("S1-A1-0815-N1-DNA1-WES1-5",
                   "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"),
        models.Arc(
            "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6",
            "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
        ),
        models.Arc("S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
                   "S1-A1-somatic variant calling-1-8"),
        models.Arc("S1-A1-somatic variant calling-1-8",
                   "S1-A1-0815-somatic.vcf.gz-COL9"),
        models.Arc("S1-sample-0815-T1", "S1-A1-library preparation-2-2"),
        models.Arc("S1-A1-library preparation-2-2", "S1-A1-0815-T1-DNA1-COL3"),
        models.Arc("S1-A1-0815-T1-DNA1-COL3", "S1-A1-0815-T1-DNA1-WES1-5"),
        models.Arc("S1-A1-0815-T1-DNA1-WES1-5",
                   "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"),
        models.Arc(
            "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6",
            "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
        ),
        models.Arc("S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7",
                   "S1-A1-somatic variant calling-1-8"),
    )
    assert expected == assay.arcs
Пример #19
0
def test_header_exception_unknown_header(assay_file_exception_unknown_header):
    with pytest.raises(ParseIsatabException) as excinfo:
        AssayReader.from_stream("S1", "A1",
                                assay_file_exception_unknown_header)
    msg = 'Header "Test Name" unknown, processing unclear'
    assert msg == str(excinfo.value)