def test_write_BII_I_1_investigation(BII_I_1_investigation_file, tmp_path): # Read Investigation from file-like object with pytest.warns(IsaWarning) as record: investigation = InvestigationReader.from_stream( BII_I_1_investigation_file).read() InvestigationValidator(investigation).validate() # Check warnings assert 1 == len(record) msg = "Skipping empty ontology source: , , , " assert record[0].category == ParseIsatabWarning assert str(record[0].message) == msg # Write Investigation to temporary file path1 = tmp_path / "i_investigation.txt" with open(path1, "wt") as file: InvestigationWriter.from_stream(investigation, file, lineterminator="\n").write() # Read Investigation from temporary file with open(path1, "rt") as file: reader = InvestigationReader.from_stream(file) investigation = reader.read() # Write Investigation to second temporary file path2 = tmp_path / "i_investigation_2.txt" with open(path2, "wt") as file: InvestigationWriter.from_stream(investigation, file, lineterminator="\n").write() # Compare input and output assert filecmp.cmp(path1, path2, shallow=False)
def test_write_full2_investigation(full2_investigation_file, tmp_path): # Read Investigation from file-like object with pytest.warns(IsaWarning) as record: investigation = InvestigationReader.from_stream( full2_investigation_file).read() InvestigationValidator(investigation).validate() # Check warnings assert 3 == len(record) msg = "Study with incomplete minimal information (ID and path):\nID:\t\nTitle:\t\nPath:\t" assert record[0].category == CriticalIsaValidationWarning assert str(record[0].message) == msg msg = "Study without title:\nID:\t\nTitle:\t\nPath:\t" assert record[1].category == ModerateIsaValidationWarning assert str(record[1].message) == msg msg = ( "Assay with incomplete minimal information (path, measurement and technology type):\n" "Path:\t\n" "Measurement Type:\tmetabolite profiling\n" "Technology Type:\tmass spectrometry\n" "Technology Platform:\tLC-MS/MS") assert record[2].category == CriticalIsaValidationWarning assert str(record[2].message) == msg # Write Investigation to temporary file path = tmp_path / "i_fullinvest2.txt" with open(path, "wt") as file: InvestigationWriter.from_stream(investigation, file, lineterminator="\n").write() # Compare input and output assert filecmp.cmp(full2_investigation_file.name, path, shallow=False)
def _perform_update(self, isa, ped_donors): # Traverse investigation, studies, assays, potentially updating the nodes. donor_map = self._build_donor_map(ped_donors) visitor = SheetUpdateVisitor(donor_map, self.config) iwalker = isa_support.InvestigationTraversal(isa.investigation, isa.studies, isa.assays) iwalker.run(visitor) investigation, studies, assays = iwalker.build_evolved() # Add records to study and assay for donors not seen so far. todo_ped_donors = [ donor for donor in donor_map.values() if donor.name not in visitor.seen_source_names ] studies, assays = isa_germline_append_donors( studies, assays, tuple(todo_ped_donors), tuple(visitor.seen_sample_names), self.config ) new_isa = attr.evolve(isa, investigation=investigation, studies=studies, assays=assays) # Write ISA-tab into string buffers. io_investigation = io.StringIO() InvestigationWriter.from_stream(isa.investigation, io_investigation).write() ios_studies = {} for name, study in new_isa.studies.items(): ios_studies[name] = io.StringIO() StudyWriter.from_stream(study, ios_studies[name]).write() ios_assays = {} for name, assay in new_isa.assays.items(): ios_assays[name] = io.StringIO() AssayWriter.from_stream(assay, ios_assays[name]).write() # Write out updated ISA-tab files using the diff helper. i_path = pathlib.Path(self.config.input_investigation_file) overwrite_helper( i_path, io_investigation.getvalue(), do_write=not self.config.dry_run, show_diff=True, show_diff_side_by_side=self.config.show_diff_side_by_side, answer_yes=self.config.yes, ) for filename, ios_study in ios_studies.items(): overwrite_helper( i_path.parent / filename, ios_study.getvalue(), do_write=not self.config.dry_run, show_diff=True, show_diff_side_by_side=self.config.show_diff_side_by_side, answer_yes=self.config.yes, ) for filename, ios_assay in ios_assays.items(): overwrite_helper( i_path.parent / filename, ios_assay.getvalue(), do_write=not self.config.dry_run, show_diff=True, show_diff_side_by_side=self.config.show_diff_side_by_side, answer_yes=self.config.yes, )
def _perform_update(self, isa, annotation_map, header_map): # Traverse investigation, studies, assays, potentially updating the nodes. visitor = SheetUpdateVisitor( annotation_map, header_map, self.config.force_update, self.config.target_study, self.config.target_assay, ) iwalker = isa_support.InvestigationTraversal(isa.investigation, isa.studies, isa.assays) iwalker.run(visitor) investigation, studies, assays = iwalker.build_evolved() new_isa = attr.evolve(isa, investigation=investigation, studies=studies, assays=assays) # Write ISA-tab into string buffers. io_investigation = io.StringIO() InvestigationWriter.from_stream(isa.investigation, io_investigation).write() ios_studies = {} for name, study in new_isa.studies.items(): ios_studies[name] = io.StringIO() StudyWriter.from_stream(study, ios_studies[name]).write() ios_assays = {} for name, assay in new_isa.assays.items(): ios_assays[name] = io.StringIO() AssayWriter.from_stream(assay, ios_assays[name]).write() # Write out updated ISA-tab files using the diff helper. i_path = pathlib.Path(self.config.input_investigation_file) overwrite_helper( i_path, io_investigation.getvalue(), do_write=not self.config.dry_run, show_diff=True, show_diff_side_by_side=self.config.show_diff_side_by_side, answer_yes=self.config.yes, ) for filename, ios_study in ios_studies.items(): overwrite_helper( i_path.parent / filename, ios_study.getvalue(), do_write=not self.config.dry_run, show_diff=True, show_diff_side_by_side=self.config.show_diff_side_by_side, answer_yes=self.config.yes, ) for filename, ios_assay in ios_assays.items(): overwrite_helper( i_path.parent / filename, ios_assay.getvalue(), do_write=not self.config.dry_run, show_diff=True, show_diff_side_by_side=self.config.show_diff_side_by_side, answer_yes=self.config.yes, )
def test_write_comment_investigation(comment_investigation_file, tmp_path): # Read Investigation from file-like object investigation = InvestigationReader.from_stream( comment_investigation_file).read() InvestigationValidator(investigation).validate() # Write Investigation to temporary file path = tmp_path / "i_comments.txt" with open(path, "wt") as file: InvestigationWriter.from_stream(investigation, file, lineterminator="\n").write() # Compare input and output assert filecmp.cmp(comment_investigation_file.name, path, shallow=False)
def run_writing(args, path_out, investigation, studies, assays): # Write investigation if args.output_investigation_file.name == "<stdout>": InvestigationWriter.from_stream(investigation, args.output_investigation_file, quote=args.quotes).write() else: with open(args.output_investigation_file.name, "wt", newline="") as outputf: InvestigationWriter.from_stream(investigation, outputf, quote=args.quotes).write() # Write studies and assays for s, study_info in enumerate(investigation.studies): if args.output_investigation_file.name == "<stdout>": if study_info.info.path: StudyWriter.from_stream(studies[s], args.output_investigation_file, quote=args.quotes).write() for a, assay_info in enumerate(study_info.assays): if assay_info.path: AssayWriter.from_stream(assays[s][a], args.output_investigation_file, quote=args.quotes).write() else: if study_info.info.path: with open(os.path.join(path_out, study_info.info.path), "wt", newline="") as outputf: StudyWriter.from_stream(studies[s], outputf, quote=args.quotes).write() for a, assay_info in enumerate(study_info.assays): if assay_info.path: with open(os.path.join(path_out, assay_info.path), "wt", newline="") as outputf: AssayWriter.from_stream(assays[s][a], outputf, quote=args.quotes).write()
def test_write_minimal_investigation(minimal_investigation_file, tmp_path): # Read Investigation from file-like object with pytest.warns(IsaWarning) as record: investigation = InvestigationReader.from_stream( minimal_investigation_file).read() InvestigationValidator(investigation).validate() # Check warnings assert 1 == len(record) # Write Investigation to temporary file path = tmp_path / "i_minimal.txt" with pytest.warns(IsaWarning) as record: with open(path, "wt") as file: InvestigationWriter.from_stream(investigation, file, lineterminator="\n").write() # Check warnings assert 6 == len(record) msg = "No reference headers available for section STUDY PUBLICATIONS. Applying default order." assert record[3].category == WriteIsatabWarning assert str(record[3].message) == msg # Compare input and output assert filecmp.cmp(minimal_investigation_file.name, path, shallow=False)
def test_write_full_investigation(full_investigation_file, tmp_path): # Read Investigation from file-like object investigation = InvestigationReader.from_stream( full_investigation_file).read() InvestigationValidator(investigation).validate() # Write Investigation to temporary file path1 = tmp_path / "i_fullinvest.txt" with open(path1, "wt") as file: InvestigationWriter.from_stream(investigation, file, lineterminator="\n").write() # Read Investigation from temporary file with open(path1, "rt") as file: reader = InvestigationReader.from_stream(file) investigation = reader.read() # Write Investigation to second temporary file path2 = tmp_path / "i_fullinvest_2.txt" with open(path2, "wt") as file: InvestigationWriter.from_stream(investigation, file, lineterminator="\n").write() # Compare input and output assert filecmp.cmp(path1, path2, shallow=False)
def test_write_assays_investigation(assays_investigation_file, tmp_path): # Read Investigation from file-like object investigation = InvestigationReader.from_stream( assays_investigation_file).read() with pytest.warns(IsaWarning) as record: InvestigationValidator(investigation).validate() # Check warnings assert 5 == len(record) # Write Investigation to temporary file path1 = tmp_path / "i_assays.txt" with pytest.warns(IsaWarning) as record: with open(path1, "wt") as file: InvestigationWriter.from_stream(investigation, file, lineterminator="\n").write() # Check warnings assert 12 == len(record) # Read Investigation from temporary file with open(path1, "rt") as file: reader = InvestigationReader.from_stream(file) investigation = reader.read() with pytest.warns(IsaWarning) as record: InvestigationValidator(investigation).validate() # Check warnings assert 5 == len(record) # Write Investigation to second temporary file path2 = tmp_path / "i_assays_2.txt" with pytest.warns(IsaWarning) as record: with open(path2, "wt") as file: InvestigationWriter.from_stream(investigation, file, lineterminator="\n").write() # Check warnings assert 12 == len(record) # Compare input and output assert filecmp.cmp(path1, path2, shallow=False)
def create_and_write(out_path): """Create an investigation with a study and assay and write to ``output_path``.""" # Prepare one or more study sections # Prepare basic study information study_info = models.BasicInfo( path="s_minimal.txt", identifier="s_minimal", title="Germline Study", description=None, submission_date=None, public_release_date=None, comments=( models.Comment(name="Study Grant Number", value=None), models.Comment(name="Study Funding Agency", value=None), ), headers=[], ) # Create one or more assays assay_01 = models.AssayInfo( measurement_type=models.OntologyTermRef( name="exome sequencing assay", accession="http://purl.obolibrary.org/obo/OBI_0002118", ontology_name="OBI", ), technology_type=models.OntologyTermRef( name="nucleotide sequencing", accession="http://purl.obolibrary.org/obo/OBI_0000626", ontology_name="OBI", ), platform=None, path="a_minimal.txt", comments=(), headers=[], ) # Prepare one or more protocols protocol_01 = models.ProtocolInfo( name="sample collection", type=models.OntologyTermRef(name="sample collection"), description=None, uri=None, version=None, parameters={}, components={}, comments=(), headers=[], ) protocol_02 = models.ProtocolInfo( name="nucleic acid sequencing", type=models.OntologyTermRef(name="nucleic acid sequencing"), description=None, uri=None, version=None, parameters={}, components={}, comments=(), headers=[], ) # Create study object study_01 = models.StudyInfo( info=study_info, designs=(), publications=(), factors={}, assays=(assay_01, ), protocols={ protocol_01.name: protocol_01, protocol_02.name: protocol_02 }, contacts=(), ) # Prepare other investigation section # Prepare one or more ontology term source references onto_ref_01 = models.OntologyRef( name="OBI", file="http://data.bioontology.org/ontologies/OBI", version="31", description="Ontology for Biomedical Investigations", comments=(), headers=[], ) # Prepare basic investigation information invest_info = models.BasicInfo( path="i_minimal.txt", identifier="i_minimal", title="Minimal Investigation", description=None, submission_date=None, public_release_date=None, comments=(), headers=[], ) # Create investigation object investigation = models.InvestigationInfo( ontology_source_refs={onto_ref_01.name: onto_ref_01}, info=invest_info, publications=(), contacts=(), studies=(study_01, ), ) # Validate investigation InvestigationValidator(investigation).validate() # Write the investigation as ISA-Tab txt file with open(join(out_path, investigation.info.path), "wt", newline="") as outputf: InvestigationWriter.from_stream(investigation=investigation, output_file=outputf).write() # Create a corresponding Study graph # Create at least on source, one sample and one collection process # Unique names are required for unambiguous node identification source_01 = models.Material( type="Source Name", unique_name="S1-source-0815", name="0815", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.SOURCE_NAME], ) sample_01 = models.Material( type="Sample Name", unique_name="S1-sample-0815-N1", name="0815-N1", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.SAMPLE_NAME], ) process_01 = models.Process( protocol_ref="sample collection", unique_name="S1-sample collection-2-1", name=None, name_type=None, date=None, performer=None, parameter_values=(), comments=(), array_design_ref=None, first_dimension=None, second_dimension=None, headers=[table_headers.PROTOCOL_REF], ) # Create the arcs to connect the material and process nodes, referenced by the unique name arc_01 = models.Arc(tail="S1-source-0815", head="S1-sample collection-2-1") arc_02 = models.Arc(tail="S1-sample collection-2-1", head="S1-sample-0815-N1") # Create the study graph object study_graph_01 = models.Study( file=investigation.studies[0].info.path, header=None, materials={ source_01.unique_name: source_01, sample_01.unique_name: sample_01 }, processes={process_01.unique_name: process_01}, arcs=(arc_01, arc_02), ) # Validate study graph StudyValidator(investigation=investigation, study_info=investigation.studies[0], study=study_graph_01).validate() # Write the study as ISA-Tab txt file with open(join(out_path, investigation.studies[0].info.path), "wt", newline="") as outputf: StudyWriter.from_stream(study_or_assay=study_graph_01, output_file=outputf).write() # Create a corresponding Assay graph # Create at least on samples, one output material and one collection process # Unique names are required for unambiguous node identification # Explicit header definition per node is currently required to enable export to ISA-Tab sample_01 = models.Material( type="Sample Name", unique_name="S1-sample-0815-N1", name="0815-N1", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.SAMPLE_NAME], ) data_file_01 = models.Material( type="Raw Data File", unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", name="0815-N1-DNA1-WES1_L???_???_R1.fastq.gz", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.RAW_DATA_FILE], ) data_file_02 = models.Material( type="Raw Data File", unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5", name="0815-N1-DNA1-WES1_L???_???_R2.fastq.gz", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.RAW_DATA_FILE], ) process_01 = models.Process( protocol_ref="nucleic acid sequencing", unique_name="S1-A1-0815-N1-DNA1-WES1-3", name="0815-N1-DNA1-WES1", name_type="Assay Name", date=None, performer=None, parameter_values=(), comments=(), array_design_ref=None, first_dimension=None, second_dimension=None, headers=[table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME], ) # Create the arcs to connect the material and process nodes, referenced by the unique name arcs = ( models.Arc(tail="S1-sample-0815-N1", head="S1-A1-0815-N1-DNA1-WES1-3"), models.Arc( tail="S1-A1-0815-N1-DNA1-WES1-3", head="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", ), models.Arc( tail="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", head="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5", ), ) # Create the assay graph object assay_graph_01 = models.Assay( file=investigation.studies[0].assays[0].path, header=None, materials={ sample_01.unique_name: sample_01, data_file_01.unique_name: data_file_01, data_file_02.unique_name: data_file_02, }, processes={process_01.unique_name: process_01}, arcs=arcs, ) # Validate assay graph AssayValidator( investigation=investigation, study_info=investigation.studies[0], assay_info=investigation.studies[0].assays[0], assay=assay_graph_01, ).validate() # Write the assay as ISA-Tab txt file with open(join(out_path, investigation.studies[0].assays[0].path), "wt", newline="") as outputf: AssayWriter.from_stream(study_or_assay=assay_graph_01, output_file=outputf).write()