def test_study_row_reader_minimal_study(minimal_investigation_file, minimal_study_file): """Use ``StudyRowReader`` to read in minimal study file.""" # Create new row reader and check read headers row_reader = StudyRowReader.from_stream("S1", minimal_study_file) assert 3 == len(row_reader.header) # Read all rows in study rows = list(row_reader.read()) # Check results assert 1 == len(rows) first_row = rows[0] assert 3 == len(first_row) expected = models.Material("Source Name", "S1-source-0815", "0815", None, (), (), (), None, [table_headers.SOURCE_NAME]) assert expected == first_row[0] expected = models.Process( "sample collection", "S1-sample collection-2-1", None, None, None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF], ) assert expected == first_row[1] expected = models.Material( "Sample Name", "S1-sample-0815-N1", "0815-N1", None, (), (), (), None, [table_headers.SAMPLE_NAME], ) assert expected == first_row[2]
def test_assay_reader_minimal_assay(minimal_investigation_file, minimal_assay_file): """Use ``AssayReader`` to read in minimal assay file. Using the ``AssayReader`` instead of the ``AssayRowReader`` gives us ``Assay`` objects instead of just the row-wise nodes. """ # Load investigation (tested elsewhere) investigation = InvestigationReader.from_stream( minimal_investigation_file).read() with pytest.warns(IsaWarning) as record: InvestigationValidator(investigation).validate() # Check warnings assert 1 == len(record) # Create new row reader and check read headers reader = AssayReader.from_stream("S1", "A1", minimal_assay_file) assert 5 == len(reader.header) # Read and validate assay assay = reader.read() AssayValidator(investigation, investigation.studies[0], investigation.studies[0].assays[0], assay).validate() # Check results assert os.path.normpath(str(assay.file)).endswith( os.path.normpath("data/i_minimal/a_minimal.txt")) assert 5 == len(assay.header) assert 3 == len(assay.materials) assert 1 == len(assay.processes) assert 3 == len(assay.arcs) expected = models.Material( "Sample Name", "S1-sample-0815-N1", "0815-N1", None, (), (), (), None, [table_headers.SAMPLE_NAME], ) assert expected == assay.materials["S1-sample-0815-N1"] expected = models.Material( "Raw Data File", "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", "0815-N1-DNA1-WES1_L???_???_R1.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == assay.materials[ "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4"] expected = models.Material( "Raw Data File", "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5", "0815-N1-DNA1-WES1_L???_???_R2.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == assay.materials[ "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5"] expected = models.Process( "nucleic acid sequencing", "S1-A1-0815-N1-DNA1-WES1-3", "0815-N1-DNA1-WES1", "Assay Name", None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME], ) assert expected == assay.processes["S1-A1-0815-N1-DNA1-WES1-3"] expected = ( models.Arc("S1-sample-0815-N1", "S1-A1-0815-N1-DNA1-WES1-3"), models.Arc("S1-A1-0815-N1-DNA1-WES1-3", "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4"), models.Arc( "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5", ), ) assert expected == assay.arcs
def test_assay_reader_gelelect(gelelect_investigation_file, gelelect_assay_file): """Use ``AssayReader`` to read in small assay file.""" with pytest.warns(IsaWarning) as record: # Load investigation investigation = InvestigationReader.from_stream( gelelect_investigation_file).read() InvestigationValidator(investigation).validate() # Create new row reader and check read headers reader = AssayReader.from_stream("S1", "A1", gelelect_assay_file) assert 22 == len(reader.header) # Read assay assay = reader.read() AssayValidator(investigation, investigation.studies[0], investigation.studies[0].assays[0], assay).validate() # Check warnings assert 4 == len(record) # Check results assert os.path.normpath(str(assay.file)).endswith( os.path.normpath( "data/test_gelelect/a_study01_protein_expression_profiling_gel_electrophoresis.txt" )) assert 22 == len(assay.header) assert 9 == len(assay.materials) assert 10 == len(assay.processes) assert 18 == len(assay.arcs) expected = models.Material( "Image File", "S1-A1-Image01.jpeg-COL19", "Image01.jpeg", None, (), (), (), None, [table_headers.IMAGE_FILE], ) assert expected == assay.materials["S1-A1-Image01.jpeg-COL19"] expected = models.Process( "data collection", "S1-A1-Scan02-18", "Scan02", "Scan Name", None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF, table_headers.SCAN_NAME], ) assert expected == assay.processes["S1-A1-Scan02-18"] header_electrophoresis = [ table_headers.PROTOCOL_REF, table_headers.GEL_ELECTROPHORESIS_ASSAY_NAME, table_headers.FIRST_DIMENSION, table_headers.TERM_SOURCE_REF, table_headers.TERM_ACCESSION_NUMBER, table_headers.SECOND_DIMENSION, table_headers.TERM_SOURCE_REF, table_headers.TERM_ACCESSION_NUMBER, ] expected = models.Process( "electrophoresis", "S1-A1-Assay01-10", "Assay01", "Gel Electrophoresis Assay Name", None, None, (), (), None, models.OntologyTermRef("", "", ""), models.OntologyTermRef("", "", ""), header_electrophoresis, ) assert expected == assay.processes["S1-A1-Assay01-10"] expected = models.Process( "electrophoresis", "S1-A1-electrophoresis-9-2", "", "Gel Electrophoresis Assay Name", None, None, (), (), None, models.OntologyTermRef("AssayX", None, None), models.OntologyTermRef("AssayY", None, None), header_electrophoresis, ) assert expected == assay.processes["S1-A1-electrophoresis-9-2"]
def test_assay_reader_small_assay(small_investigation_file, small_assay_file): """Use ``AssayReader`` to read in small assay file.""" # Load investigation (tested elsewhere) investigation = InvestigationReader.from_stream( small_investigation_file).read() with pytest.warns(IsaWarning) as record: InvestigationValidator(investigation).validate() # Check warnings assert 1 == len(record) # Create new row reader and check read headers reader = AssayReader.from_stream("S1", "A1", small_assay_file) assert 9 == len(reader.header) # Read assay with pytest.warns(IsaWarning) as record: assay = reader.read() AssayValidator(investigation, investigation.studies[0], investigation.studies[0].assays[0], assay).validate() # Check warnings assert 1 == len(record) # Check results assert os.path.normpath(str(assay.file)).endswith( os.path.normpath("data/i_small/a_small.txt")) assert 9 == len(assay.header) assert 9 == len(assay.materials) assert 5 == len(assay.processes) assert 13 == len(assay.arcs) expected = models.Material( "Sample Name", "S1-sample-0815-N1", "0815-N1", None, (), (), (), None, [table_headers.SAMPLE_NAME], ) assert expected == assay.materials["S1-sample-0815-N1"] expected = models.Material( "Sample Name", "S1-sample-0815-T1", "0815-T1", None, (), (), (), None, [table_headers.SAMPLE_NAME], ) assert expected == assay.materials["S1-sample-0815-T1"] expected = models.Material( "Raw Data File", "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6", "0815-N1-DNA1-WES1_L???_???_R1.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == assay.materials[ "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"] expected = models.Material( "Raw Data File", "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", "0815-N1-DNA1-WES1_L???_???_R2.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == assay.materials[ "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7"] expected = models.Material( "Raw Data File", "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6", "0815-T1-DNA1-WES1_L???_???_R1.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == assay.materials[ "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"] expected = models.Material( "Raw Data File", "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", "0815-T1-DNA1-WES1_L???_???_R2.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == assay.materials[ "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7"] expected = models.Material( "Derived Data File", "S1-A1-0815-somatic.vcf.gz-COL9", "0815-somatic.vcf.gz", None, (), (), (), None, [table_headers.DERIVED_DATA_FILE], ) assert expected == assay.materials["S1-A1-0815-somatic.vcf.gz-COL9"] expected = models.Process( "library preparation", "S1-A1-library preparation-2-1", None, None, None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF], ) assert expected == assay.processes["S1-A1-library preparation-2-1"] expected = models.Process( "library preparation", "S1-A1-library preparation-2-2", None, None, None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF], ) assert expected == assay.processes["S1-A1-library preparation-2-2"] expected = models.Process( "nucleic acid sequencing", "S1-A1-0815-N1-DNA1-WES1-5", "0815-N1-DNA1-WES1", "Assay Name", None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME], ) assert expected == assay.processes["S1-A1-0815-N1-DNA1-WES1-5"] expected = models.Process( "nucleic acid sequencing", "S1-A1-0815-T1-DNA1-WES1-5", "0815-T1-DNA1-WES1", "Assay Name", None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME], ) assert expected == assay.processes["S1-A1-0815-T1-DNA1-WES1-5"] expected = ( models.Arc("S1-sample-0815-N1", "S1-A1-library preparation-2-1"), models.Arc("S1-A1-library preparation-2-1", "S1-A1-0815-N1-DNA1-COL3"), models.Arc("S1-A1-0815-N1-DNA1-COL3", "S1-A1-0815-N1-DNA1-WES1-5"), models.Arc("S1-A1-0815-N1-DNA1-WES1-5", "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"), models.Arc( "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6", "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", ), models.Arc("S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", "S1-A1-somatic variant calling-1-8"), models.Arc("S1-A1-somatic variant calling-1-8", "S1-A1-0815-somatic.vcf.gz-COL9"), models.Arc("S1-sample-0815-T1", "S1-A1-library preparation-2-2"), models.Arc("S1-A1-library preparation-2-2", "S1-A1-0815-T1-DNA1-COL3"), models.Arc("S1-A1-0815-T1-DNA1-COL3", "S1-A1-0815-T1-DNA1-WES1-5"), models.Arc("S1-A1-0815-T1-DNA1-WES1-5", "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6"), models.Arc( "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6", "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", ), models.Arc("S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", "S1-A1-somatic variant calling-1-8"), ) assert expected == assay.arcs
def test_assay_row_reader_small_assay(small_investigation_file, small_assay_file): """Use ``AssayRowReader`` to read in small assay file.""" # Create new row reader and check read headers row_reader = AssayRowReader.from_stream("S1", "A1", small_assay_file) assert 9 == len(row_reader.header) # Read all rows in assay rows = list(row_reader.read()) # Check results assert 2 == len(rows) first_row = rows[0] second_row = rows[1] assert 8 == len(first_row) expected = models.Material( "Sample Name", "S1-sample-0815-N1", "0815-N1", None, (), (), (), None, [table_headers.SAMPLE_NAME], ) assert expected == first_row[0] expected = models.Process( "library preparation", "S1-A1-library preparation-2-1", None, None, None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF], ) assert expected == first_row[1] expected = models.Material( "Library Name", "S1-A1-0815-N1-DNA1-COL3", "0815-N1-DNA1", None, (), (), (), None, [table_headers.LIBRARY_NAME], ) assert expected == first_row[2] expected = models.Process( "nucleic acid sequencing", "S1-A1-0815-N1-DNA1-WES1-5", "0815-N1-DNA1-WES1", "Assay Name", None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME], ) assert expected == first_row[3] expected = models.Material( "Raw Data File", "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL6", "0815-N1-DNA1-WES1_L???_???_R1.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == first_row[4] expected = models.Material( "Raw Data File", "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", "0815-N1-DNA1-WES1_L???_???_R2.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == first_row[5] expected = models.Process( "Unknown", "S1-A1-somatic variant calling-1-8", "somatic variant calling-1", "Data Transformation Name", None, None, (), (), None, None, None, [table_headers.DATA_TRANSFORMATION_NAME], ) assert expected == first_row[6] expected = models.Material( "Derived Data File", "S1-A1-0815-somatic.vcf.gz-COL9", "0815-somatic.vcf.gz", None, (), (), (), None, [table_headers.DERIVED_DATA_FILE], ) assert expected == first_row[7] assert 8 == len(second_row) expected = models.Material( "Sample Name", "S1-sample-0815-T1", "0815-T1", None, (), (), (), None, [table_headers.SAMPLE_NAME], ) assert expected == second_row[0] expected = models.Process( "library preparation", "S1-A1-library preparation-2-2", None, None, None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF], ) assert expected == second_row[1] expected = models.Material( "Library Name", "S1-A1-0815-T1-DNA1-COL3", "0815-T1-DNA1", None, (), (), (), None, [table_headers.LIBRARY_NAME], ) assert expected == second_row[2] expected = models.Process( "nucleic acid sequencing", "S1-A1-0815-T1-DNA1-WES1-5", "0815-T1-DNA1-WES1", "Assay Name", None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME], ) assert expected == second_row[3] expected = models.Material( "Raw Data File", "S1-A1-0815-T1-DNA1-WES1_L???_???_R1.fastq.gz-COL6", "0815-T1-DNA1-WES1_L???_???_R1.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == second_row[4] expected = models.Material( "Raw Data File", "S1-A1-0815-T1-DNA1-WES1_L???_???_R2.fastq.gz-COL7", "0815-T1-DNA1-WES1_L???_???_R2.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == second_row[5] expected = models.Process( "Unknown", "S1-A1-somatic variant calling-1-8", "somatic variant calling-1", "Data Transformation Name", None, None, (), (), None, None, None, [table_headers.DATA_TRANSFORMATION_NAME], ) assert expected == second_row[6] expected = models.Material( "Derived Data File", "S1-A1-0815-somatic.vcf.gz-COL9", "0815-somatic.vcf.gz", None, (), (), (), None, [table_headers.DERIVED_DATA_FILE], ) assert expected == second_row[7]
def test_assay_row_reader_minimal_assay(minimal_investigation_file, minimal_assay_file): """Use ``AssayRowReader`` to read in minimal assay file.""" # Create new row reader and check read headers row_reader = AssayRowReader.from_stream("S1", "A1", minimal_assay_file) assert 5 == len(row_reader.header) # Read all rows in assay rows = list(row_reader.read()) # Check results assert 1 == len(rows) first_row = rows[0] assert 4 == len(first_row) expected = models.Material( "Sample Name", "S1-sample-0815-N1", "0815-N1", None, (), (), (), None, [table_headers.SAMPLE_NAME], ) assert expected == first_row[0] expected = models.Process( "nucleic acid sequencing", "S1-A1-0815-N1-DNA1-WES1-3", "0815-N1-DNA1-WES1", "Assay Name", None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME], ) assert expected == first_row[1] expected = models.Material( "Raw Data File", "S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", "0815-N1-DNA1-WES1_L???_???_R1.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == first_row[2] expected = models.Material( "Raw Data File", "S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5", "0815-N1-DNA1-WES1_L???_???_R2.fastq.gz", None, (), (), (), None, [table_headers.RAW_DATA_FILE], ) assert expected == first_row[3]
def test_study_reader_minimal_study(minimal_investigation_file, minimal_study_file): """Use ``StudyReader`` to read in minimal study file. Using the ``StudyReader`` instead of the ``StudyRowReader`` gives us ``Study`` objects instead of just the row-wise nodes. """ # Load investigation (tested elsewhere) investigation = InvestigationReader.from_stream( minimal_investigation_file).read() with pytest.warns(IsaWarning) as record: InvestigationValidator(investigation).validate() # Check warnings assert 2 == len(record) # Create new row reader and check read headers reader = StudyReader.from_stream("S1", minimal_study_file) assert 3 == len(reader.header) # Read study study = reader.read() StudyValidator(investigation, investigation.studies[0], study).validate() # Check results assert os.path.normpath(str(study.file)).endswith( os.path.normpath("data/i_minimal/s_minimal.txt")) assert 3 == len(study.header) assert 2 == len(study.materials) assert 1 == len(study.processes) assert 2 == len(study.arcs) expected = models.Material("Source Name", "S1-source-0815", "0815", None, (), (), (), None, [table_headers.SOURCE_NAME]) assert expected == study.materials["S1-source-0815"] expected = models.Material( "Sample Name", "S1-sample-0815-N1", "0815-N1", None, (), (), (), None, [table_headers.SAMPLE_NAME], ) assert expected == study.materials["S1-sample-0815-N1"] expected = models.Process( "sample collection", "S1-sample collection-2-1", None, None, None, None, (), (), None, None, None, [table_headers.PROTOCOL_REF], ) assert expected == study.processes["S1-sample collection-2-1"] expected = ( models.Arc("S1-source-0815", "S1-sample collection-2-1"), models.Arc("S1-sample collection-2-1", "S1-sample-0815-N1"), ) assert expected == study.arcs
def test_study_reader_small_study(small_investigation_file, small_study_file): """Use ``StudyReader`` to read in small study file.""" # Load investigation (tested elsewhere) with pytest.warns(IsaWarning) as record: investigation = InvestigationReader.from_stream( small_investigation_file).read() InvestigationValidator(investigation).validate() # Check warnings assert 2 == len(record) # Create new row reader and check read headers reader = StudyReader.from_stream("S1", small_study_file) assert 13 == len(reader.header) # Read study study = reader.read() StudyValidator(investigation, investigation.studies[0], study).validate() # Check results assert os.path.normpath(str(study.file)).endswith( os.path.normpath("data/i_small/s_small.txt")) assert 13 == len(study.header) assert 9 == len(study.materials) assert 5 == len(study.processes) assert 10 == len(study.arcs) headers_source = [ table_headers.SOURCE_NAME, table_headers.CHARACTERISTICS + "[organism]", table_headers.TERM_SOURCE_REF, table_headers.TERM_ACCESSION_NUMBER, table_headers.CHARACTERISTICS + "[age]", table_headers.UNIT, table_headers.TERM_SOURCE_REF, table_headers.TERM_ACCESSION_NUMBER, ] headers_collection = [ table_headers.PROTOCOL_REF, table_headers.PARAMETER_VALUE + "[instrument]", table_headers.PERFORMER, table_headers.DATE, ] headers_sample = [ table_headers.SAMPLE_NAME, table_headers.CHARACTERISTICS + "[status]", table_headers.FACTOR_VALUE + "[treatment]", ] unit = models.OntologyTermRef( name="day", accession="http://purl.obolibrary.org/obo/UO_0000033", ontology_name="UO") characteristics1 = ( models.Characteristics( name="organism", value=[ models.OntologyTermRef( name="Mus musculus", accession="http://purl.bioontology.org/ontology/" "NCBITAXON/10090", ontology_name="NCBITAXON", ) ], unit=None, ), models.Characteristics(name="age", value=["90"], unit=unit), ) characteristics2 = ( models.Characteristics( name="organism", value=[models.OntologyTermRef("Mus musculus", "", "")], unit=None), models.Characteristics(name="age", value=[""], unit=unit), ) characteristics3 = ( models.Characteristics( name="organism", value=[models.OntologyTermRef(None, None, None)], unit=None), models.Characteristics(name="age", value=["150"], unit=unit), ) expected = models.Material( "Source Name", "S1-source-0815", "0815", None, characteristics1, (), (), None, headers_source, ) assert expected == study.materials["S1-source-0815"] expected = models.Material( "Source Name", "S1-source-0816", "0816", None, characteristics2, (), (), None, headers_source, ) assert expected == study.materials["S1-source-0816"] expected = models.Material( "Source Name", "S1-source-0817", "0817", None, characteristics3, (), (), None, headers_source, ) assert expected == study.materials["S1-source-0817"] expected = models.Material( "Sample Name", "S1-sample-0815-N1", "0815-N1", None, (models.Characteristics("status", ["0"], None), ), (), (models.FactorValue("treatment", "yes", None), ), None, headers_sample, ) assert expected == study.materials["S1-sample-0815-N1"] expected = models.Material( "Sample Name", "S1-sample-0815-T1", "0815-T1", None, (models.Characteristics("status", ["2"], None), ), (), (models.FactorValue("treatment", "", None), ), None, headers_sample, ) assert expected == study.materials["S1-sample-0815-T1"] expected = models.Material( "Sample Name", "S1-sample-0816-T1", "0816-T1", None, (models.Characteristics("status", ["1"], None), ), (), (models.FactorValue("treatment", "yes", None), ), None, headers_sample, ) assert expected == study.materials["S1-sample-0816-T1"] expected = models.Material( "Sample Name", "S1-Empty Sample Name-13-5", "", None, (models.Characteristics("status", [""], None), ), (), (models.FactorValue("treatment", "", None), ), None, headers_sample, ) assert expected == study.materials["S1-Empty Sample Name-13-5"] expected = models.Process( "sample collection", "S1-sample collection-9-2", None, None, date(2018, 2, 2), "John Doe", (models.ParameterValue("instrument", ["scalpel"], None), ), (), None, None, None, headers_collection, ) assert expected == study.processes["S1-sample collection-9-2"] expected = models.Process( "sample collection", "S1-sample collection-9-3", None, None, date(2018, 2, 2), "John Doe", (models.ParameterValue("instrument", ["scalpel type A", "scalpel type B"], None), ), (), None, None, None, headers_collection, ) assert expected == study.processes["S1-sample collection-9-3"] expected = models.Process( "sample collection", "S1-sample collection-9-4", None, None, date(2018, 2, 2), "John Doe", (models.ParameterValue("instrument", ["scalpel"], None), ), (), None, None, None, headers_collection, ) assert expected == study.processes["S1-sample collection-9-4"] expected = ( models.Arc("S1-source-0814", "S1-sample collection-9-1"), models.Arc("S1-sample collection-9-1", "S1-sample-0814-N1"), models.Arc("S1-source-0815", "S1-sample collection-9-2"), models.Arc("S1-sample collection-9-2", "S1-sample-0815-N1"), models.Arc("S1-source-0815", "S1-sample collection-9-3"), models.Arc("S1-sample collection-9-3", "S1-sample-0815-T1"), models.Arc("S1-source-0816", "S1-sample collection-9-4"), models.Arc("S1-sample collection-9-4", "S1-sample-0816-T1"), models.Arc("S1-source-0817", "S1-sample collection-9-5"), models.Arc("S1-sample collection-9-5", "S1-Empty Sample Name-13-5"), ) assert expected == study.arcs
def test_study_row_reader_small_study(small_investigation_file, small_study_file): """Use ``StudyRowReader`` to read in small study file.""" # Create new row reader and check read headers (+ string representation) row_reader = StudyRowReader.from_stream("S1", small_study_file) assert 13 == len(row_reader.header) rep0 = "ColumnHeader(column_type='Source Name', col_no=0, span=1)" rep1 = "LabeledColumnHeader(column_type='Characteristics', col_no=1, span=1, label='organism')" assert rep0 == repr(row_reader.header[0]) assert rep1 == repr(row_reader.header[1]) # Read all rows in study rows = list(row_reader.read()) # Check results assert 5 == len(rows) first_row = rows[0] second_row = rows[1] third_row = rows[2] assert 3 == len(second_row) headers_source = [ table_headers.SOURCE_NAME, table_headers.CHARACTERISTICS + "[organism]", table_headers.TERM_SOURCE_REF, table_headers.TERM_ACCESSION_NUMBER, table_headers.CHARACTERISTICS + "[age]", table_headers.UNIT, table_headers.TERM_SOURCE_REF, table_headers.TERM_ACCESSION_NUMBER, ] headers_collection = [ table_headers.PROTOCOL_REF, table_headers.PARAMETER_VALUE + "[instrument]", table_headers.PERFORMER, table_headers.DATE, ] headers_sample = [ table_headers.SAMPLE_NAME, table_headers.CHARACTERISTICS + "[status]", table_headers.FACTOR_VALUE + "[treatment]", ] unit = models.OntologyTermRef( name="day", accession="http://purl.obolibrary.org/obo/UO_0000033", ontology_name="UO") characteristics1 = ( models.Characteristics( name="organism", value=[ models.OntologyTermRef( name="Mus musculus", accession= "http://purl.bioontology.org/ontology/NCBITAXON/10090", ontology_name="NCBITAXON", ), models.OntologyTermRef( name="H**o sapiens", accession= "http://purl.bioontology.org/ontology/NCBITAXON/9606", ontology_name="NCBITAXON", ), ], unit=None, ), models.Characteristics(name="age", value=["90"], unit=unit), ) expected = models.Material( "Source Name", "S1-source-0814", "0814", None, characteristics1, (), (), None, headers_source, ) assert expected == first_row[0] characteristics2 = ( models.Characteristics( name="organism", value=[ models.OntologyTermRef( name="Mus musculus", accession= "http://purl.bioontology.org/ontology/NCBITAXON/10090", ontology_name="NCBITAXON", ) ], unit=None, ), models.Characteristics(name="age", value=["90"], unit=unit), ) expected = models.Material( "Source Name", "S1-source-0815", "0815", None, characteristics2, (), (), None, headers_source, ) assert expected == second_row[0] expected = models.Process( "sample collection", "S1-sample collection-9-2", None, None, date(2018, 2, 2), "John Doe", (models.ParameterValue("instrument", ["scalpel"], None), ), (), None, None, None, headers_collection, ) assert expected == second_row[1] expected = models.Material( "Sample Name", "S1-sample-0815-N1", "0815-N1", None, (models.Characteristics("status", ["0"], None), ), (), (models.FactorValue("treatment", "yes", None), ), None, headers_sample, ) assert expected == second_row[2] assert 3 == len(third_row) expected = models.Material( "Source Name", "S1-source-0815", "0815", None, characteristics2, (), (), None, headers_source, ) assert expected == third_row[0] expected = models.Process( "sample collection", "S1-sample collection-9-3", None, None, date(2018, 2, 2), "John Doe", (models.ParameterValue("instrument", ["scalpel type A", "scalpel type B"], None), ), (), None, None, None, headers_collection, ) assert expected == third_row[1] expected = models.Material( "Sample Name", "S1-sample-0815-T1", "0815-T1", None, (models.Characteristics("status", ["2"], None), ), (), (models.FactorValue("treatment", "", None), ), None, headers_sample, ) assert expected == third_row[2]
def create_and_write(out_path): """Create an investigation with a study and assay and write to ``output_path``.""" # Prepare one or more study sections # Prepare basic study information study_info = models.BasicInfo( path="s_minimal.txt", identifier="s_minimal", title="Germline Study", description=None, submission_date=None, public_release_date=None, comments=( models.Comment(name="Study Grant Number", value=None), models.Comment(name="Study Funding Agency", value=None), ), headers=[], ) # Create one or more assays assay_01 = models.AssayInfo( measurement_type=models.OntologyTermRef( name="exome sequencing assay", accession="http://purl.obolibrary.org/obo/OBI_0002118", ontology_name="OBI", ), technology_type=models.OntologyTermRef( name="nucleotide sequencing", accession="http://purl.obolibrary.org/obo/OBI_0000626", ontology_name="OBI", ), platform=None, path="a_minimal.txt", comments=(), headers=[], ) # Prepare one or more protocols protocol_01 = models.ProtocolInfo( name="sample collection", type=models.OntologyTermRef(name="sample collection"), description=None, uri=None, version=None, parameters={}, components={}, comments=(), headers=[], ) protocol_02 = models.ProtocolInfo( name="nucleic acid sequencing", type=models.OntologyTermRef(name="nucleic acid sequencing"), description=None, uri=None, version=None, parameters={}, components={}, comments=(), headers=[], ) # Create study object study_01 = models.StudyInfo( info=study_info, designs=(), publications=(), factors={}, assays=(assay_01, ), protocols={ protocol_01.name: protocol_01, protocol_02.name: protocol_02 }, contacts=(), ) # Prepare other investigation section # Prepare one or more ontology term source references onto_ref_01 = models.OntologyRef( name="OBI", file="http://data.bioontology.org/ontologies/OBI", version="31", description="Ontology for Biomedical Investigations", comments=(), headers=[], ) # Prepare basic investigation information invest_info = models.BasicInfo( path="i_minimal.txt", identifier="i_minimal", title="Minimal Investigation", description=None, submission_date=None, public_release_date=None, comments=(), headers=[], ) # Create investigation object investigation = models.InvestigationInfo( ontology_source_refs={onto_ref_01.name: onto_ref_01}, info=invest_info, publications=(), contacts=(), studies=(study_01, ), ) # Validate investigation InvestigationValidator(investigation).validate() # Write the investigation as ISA-Tab txt file with open(join(out_path, investigation.info.path), "wt", newline="") as outputf: InvestigationWriter.from_stream(investigation=investigation, output_file=outputf).write() # Create a corresponding Study graph # Create at least on source, one sample and one collection process # Unique names are required for unambiguous node identification source_01 = models.Material( type="Source Name", unique_name="S1-source-0815", name="0815", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.SOURCE_NAME], ) sample_01 = models.Material( type="Sample Name", unique_name="S1-sample-0815-N1", name="0815-N1", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.SAMPLE_NAME], ) process_01 = models.Process( protocol_ref="sample collection", unique_name="S1-sample collection-2-1", name=None, name_type=None, date=None, performer=None, parameter_values=(), comments=(), array_design_ref=None, first_dimension=None, second_dimension=None, headers=[table_headers.PROTOCOL_REF], ) # Create the arcs to connect the material and process nodes, referenced by the unique name arc_01 = models.Arc(tail="S1-source-0815", head="S1-sample collection-2-1") arc_02 = models.Arc(tail="S1-sample collection-2-1", head="S1-sample-0815-N1") # Create the study graph object study_graph_01 = models.Study( file=investigation.studies[0].info.path, header=None, materials={ source_01.unique_name: source_01, sample_01.unique_name: sample_01 }, processes={process_01.unique_name: process_01}, arcs=(arc_01, arc_02), ) # Validate study graph StudyValidator(investigation=investigation, study_info=investigation.studies[0], study=study_graph_01).validate() # Write the study as ISA-Tab txt file with open(join(out_path, investigation.studies[0].info.path), "wt", newline="") as outputf: StudyWriter.from_stream(study_or_assay=study_graph_01, output_file=outputf).write() # Create a corresponding Assay graph # Create at least on samples, one output material and one collection process # Unique names are required for unambiguous node identification # Explicit header definition per node is currently required to enable export to ISA-Tab sample_01 = models.Material( type="Sample Name", unique_name="S1-sample-0815-N1", name="0815-N1", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.SAMPLE_NAME], ) data_file_01 = models.Material( type="Raw Data File", unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", name="0815-N1-DNA1-WES1_L???_???_R1.fastq.gz", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.RAW_DATA_FILE], ) data_file_02 = models.Material( type="Raw Data File", unique_name="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5", name="0815-N1-DNA1-WES1_L???_???_R2.fastq.gz", extract_label=None, characteristics=(), comments=(), factor_values=(), material_type=None, headers=[table_headers.RAW_DATA_FILE], ) process_01 = models.Process( protocol_ref="nucleic acid sequencing", unique_name="S1-A1-0815-N1-DNA1-WES1-3", name="0815-N1-DNA1-WES1", name_type="Assay Name", date=None, performer=None, parameter_values=(), comments=(), array_design_ref=None, first_dimension=None, second_dimension=None, headers=[table_headers.PROTOCOL_REF, table_headers.ASSAY_NAME], ) # Create the arcs to connect the material and process nodes, referenced by the unique name arcs = ( models.Arc(tail="S1-sample-0815-N1", head="S1-A1-0815-N1-DNA1-WES1-3"), models.Arc( tail="S1-A1-0815-N1-DNA1-WES1-3", head="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", ), models.Arc( tail="S1-A1-0815-N1-DNA1-WES1_L???_???_R1.fastq.gz-COL4", head="S1-A1-0815-N1-DNA1-WES1_L???_???_R2.fastq.gz-COL5", ), ) # Create the assay graph object assay_graph_01 = models.Assay( file=investigation.studies[0].assays[0].path, header=None, materials={ sample_01.unique_name: sample_01, data_file_01.unique_name: data_file_01, data_file_02.unique_name: data_file_02, }, processes={process_01.unique_name: process_01}, arcs=arcs, ) # Validate assay graph AssayValidator( investigation=investigation, study_info=investigation.studies[0], assay_info=investigation.studies[0].assays[0], assay=assay_graph_01, ).validate() # Write the assay as ISA-Tab txt file with open(join(out_path, investigation.studies[0].assays[0].path), "wt", newline="") as outputf: AssayWriter.from_stream(study_or_assay=assay_graph_01, output_file=outputf).write()