def test_wrong_file_name_case(self): samples_file = test_data.open_genomic_set_file( 'Genomic-Test-Set-test-3.csv') input_filename = 'Genomic-Test-Set-v1%swrong-name.csv' % self \ ._naive_utc_to_naive_central(clock.CLOCK.now()) \ .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT) self._write_cloud_csv(input_filename, samples_file) with self.assertRaises(DataError): genomic_pipeline.process_genomic_water_line() manifest_result_file = test_data.open_genomic_set_file( 'Genomic-Manifest-Result-test.csv') manifest_result_filename = 'Genomic-Manifest-Result-AoU-1-v1%swrong-name.csv' % self \ ._naive_utc_to_naive_central(clock.CLOCK.now()) \ .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT) self._write_cloud_csv(manifest_result_filename, manifest_result_file, bucket=_FAKE_BIOBANK_SAMPLE_BUCKET, folder=_FAKE_BUCKET_RESULT_FOLDER) with self.assertRaises(DataError): genomic_pipeline.process_genomic_water_line()
def test_over_24hours_genomic_set_file_case(self): samples_file = test_data.open_genomic_set_file( 'Genomic-Test-Set-test-3.csv') over_24hours_time = clock.CLOCK.now() - datetime.timedelta(hours=25) input_filename = 'Genomic-Test-Set-v1%s.csv' % self \ ._naive_utc_to_naive_central(over_24hours_time) \ .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT) self._write_cloud_csv(input_filename, samples_file) genomic_pipeline.process_genomic_water_line() member_dao = GenomicSetMemberDao() members = member_dao.get_all() self.assertEqual(len(members), 0)
def test_end_to_end_invalid_case(self): participant = self._make_participant() self._make_summary(participant, dateOfBirth='2018-02-14') self._make_biobank_order(participantId=participant.participantId, biobankOrderId=participant.participantId, identifiers=[ BiobankOrderIdentifier( system=u'https://www.pmi-ops.org', value=u'12345678') ]) participant2 = self._make_participant() self._make_summary(participant2, consentForStudyEnrollmentTime=datetime.datetime( 1990, 1, 1)) self._make_biobank_order(participantId=participant2.participantId, biobankOrderId=participant2.participantId, identifiers=[ BiobankOrderIdentifier( system=u'https://www.pmi-ops.org', value=u'12345679') ]) participant3 = self._make_participant() self._make_summary(participant3, zipCode='') self._make_biobank_order(participantId=participant3.participantId, biobankOrderId=participant3.participantId, identifiers=[ BiobankOrderIdentifier( system=u'https://www.pmi-ops.org', value=u'12345680') ]) participant4 = self._make_participant() self._make_summary(participant4) self._make_biobank_order( participantId=participant4.participantId, biobankOrderId=participant4.participantId, identifiers=[BiobankOrderIdentifier(system=u'c', value=u'e')]) samples_file = test_data.open_genomic_set_file( 'Genomic-Test-Set-test-3.csv') input_filename = 'Genomic-Test-Set-v1%s.csv' % self\ ._naive_utc_to_naive_central(clock.CLOCK.now())\ .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT) self._write_cloud_csv(input_filename, samples_file) genomic_pipeline.process_genomic_water_line() # verify result file bucket_name = config.getSetting(config.GENOMIC_SET_BUCKET_NAME) path = self._find_latest_genomic_set_csv(bucket_name, 'Validation-Result') csv_file = cloudstorage_api.open(path) csv_reader = csv.DictReader(csv_file, delimiter=',') class ResultCsvColumns(object): """Names of CSV columns that we read from the genomic set upload.""" GENOMIC_SET_NAME = 'genomic_set_name' GENOMIC_SET_CRITERIA = 'genomic_set_criteria' PID = 'pid' BIOBANK_ORDER_ID = 'biobank_order_id' NY_FLAG = 'ny_flag' SEX_AT_BIRTH = 'sex_at_birth' GENOME_TYPE = 'genome_type' STATUS = 'status' INVALID_REASON = 'invalid_reason' ALL = (GENOMIC_SET_NAME, GENOMIC_SET_CRITERIA, PID, BIOBANK_ORDER_ID, NY_FLAG, SEX_AT_BIRTH, GENOME_TYPE, STATUS, INVALID_REASON) missing_cols = set(ResultCsvColumns.ALL) - set(csv_reader.fieldnames) self.assertEqual(len(missing_cols), 0) rows = list(csv_reader) self.assertEqual(len(rows), 4) self.assertEqual(rows[0][ResultCsvColumns.GENOMIC_SET_NAME], 'name_xxx') self.assertEqual(rows[0][ResultCsvColumns.GENOMIC_SET_CRITERIA], 'criteria_xxx') self.assertEqual(rows[0][ResultCsvColumns.STATUS], 'invalid') self.assertEqual(rows[0][ResultCsvColumns.INVALID_REASON], 'INVALID_AGE') self.assertEqual(rows[0][ResultCsvColumns.PID], '1') self.assertEqual(rows[0][ResultCsvColumns.BIOBANK_ORDER_ID], '1') self.assertEqual(rows[0][ResultCsvColumns.NY_FLAG], 'Y') self.assertEqual(rows[0][ResultCsvColumns.GENOME_TYPE], 'aou_wgs') self.assertEqual(rows[0][ResultCsvColumns.SEX_AT_BIRTH], 'M') self.assertEqual(rows[1][ResultCsvColumns.GENOMIC_SET_NAME], 'name_xxx') self.assertEqual(rows[1][ResultCsvColumns.GENOMIC_SET_CRITERIA], 'criteria_xxx') self.assertEqual(rows[1][ResultCsvColumns.STATUS], 'invalid') self.assertEqual(rows[1][ResultCsvColumns.INVALID_REASON], 'INVALID_CONSENT') self.assertEqual(rows[1][ResultCsvColumns.PID], '2') self.assertEqual(rows[1][ResultCsvColumns.BIOBANK_ORDER_ID], '2') self.assertEqual(rows[1][ResultCsvColumns.NY_FLAG], 'N') self.assertEqual(rows[1][ResultCsvColumns.GENOME_TYPE], 'aou_array') self.assertEqual(rows[1][ResultCsvColumns.SEX_AT_BIRTH], 'F') self.assertEqual(rows[2][ResultCsvColumns.GENOMIC_SET_NAME], 'name_xxx') self.assertEqual(rows[2][ResultCsvColumns.GENOMIC_SET_CRITERIA], 'criteria_xxx') self.assertEqual(rows[2][ResultCsvColumns.STATUS], 'invalid') self.assertEqual(rows[2][ResultCsvColumns.INVALID_REASON], 'INVALID_NY_ZIPCODE') self.assertEqual(rows[2][ResultCsvColumns.PID], '3') self.assertEqual(rows[2][ResultCsvColumns.BIOBANK_ORDER_ID], '3') self.assertEqual(rows[2][ResultCsvColumns.NY_FLAG], 'N') self.assertEqual(rows[2][ResultCsvColumns.GENOME_TYPE], 'aou_array') self.assertEqual(rows[2][ResultCsvColumns.SEX_AT_BIRTH], 'M') self.assertEqual(rows[3][ResultCsvColumns.GENOMIC_SET_NAME], 'name_xxx') self.assertEqual(rows[3][ResultCsvColumns.GENOMIC_SET_CRITERIA], 'criteria_xxx') self.assertEqual(rows[3][ResultCsvColumns.STATUS], 'invalid') self.assertEqual(rows[3][ResultCsvColumns.INVALID_REASON], 'INVALID_BIOBANK_ORDER_CLIENT_ID') self.assertEqual(rows[3][ResultCsvColumns.PID], '4') self.assertEqual(rows[3][ResultCsvColumns.BIOBANK_ORDER_ID], '4') self.assertEqual(rows[3][ResultCsvColumns.NY_FLAG], 'Y') self.assertEqual(rows[3][ResultCsvColumns.GENOME_TYPE], 'aou_wgs') self.assertEqual(rows[3][ResultCsvColumns.SEX_AT_BIRTH], 'F')
def test_end_to_end_valid_case(self): participant = self._make_participant() self._make_summary(participant) self._make_biobank_order(participantId=participant.participantId, biobankOrderId=participant.participantId, identifiers=[ BiobankOrderIdentifier( system=u'https://www.pmi-ops.org', value=u'12345678') ]) participant2 = self._make_participant() self._make_summary(participant2) self._make_biobank_order(participantId=participant2.participantId, biobankOrderId=participant2.participantId, identifiers=[ BiobankOrderIdentifier( system=u'https://www.pmi-ops.org', value=u'12345679') ]) participant3 = self._make_participant() self._make_summary(participant3) self._make_biobank_order(participantId=participant3.participantId, biobankOrderId=participant3.participantId, identifiers=[ BiobankOrderIdentifier( system=u'https://www.pmi-ops.org', value=u'12345680') ]) samples_file = test_data.open_genomic_set_file( 'Genomic-Test-Set-test-2.csv') input_filename = 'Genomic-Test-Set-v1%s.csv' % self\ ._naive_utc_to_naive_central(clock.CLOCK.now())\ .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT) self._write_cloud_csv(input_filename, samples_file) manifest_result_file = test_data.open_genomic_set_file( 'Genomic-Manifest-Result-test.csv') manifest_result_filename = 'Genomic-Manifest-Result-AoU-1-v1%s.csv' % self \ ._naive_utc_to_naive_central(clock.CLOCK.now()) \ .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT) self._write_cloud_csv(manifest_result_filename, manifest_result_file, bucket=_FAKE_BIOBANK_SAMPLE_BUCKET, folder=_FAKE_BUCKET_RESULT_FOLDER) genomic_pipeline.process_genomic_water_line() # verify result file bucket_name = config.getSetting(config.GENOMIC_SET_BUCKET_NAME) path = self._find_latest_genomic_set_csv(bucket_name, 'Validation-Result') csv_file = cloudstorage_api.open(path) csv_reader = csv.DictReader(csv_file, delimiter=',') class ResultCsvColumns(object): """Names of CSV columns that we read from the genomic set upload.""" GENOMIC_SET_NAME = 'genomic_set_name' GENOMIC_SET_CRITERIA = 'genomic_set_criteria' PID = 'pid' BIOBANK_ORDER_ID = 'biobank_order_id' NY_FLAG = 'ny_flag' SEX_AT_BIRTH = 'sex_at_birth' GENOME_TYPE = 'genome_type' STATUS = 'status' INVALID_REASON = 'invalid_reason' ALL = (GENOMIC_SET_NAME, GENOMIC_SET_CRITERIA, PID, BIOBANK_ORDER_ID, NY_FLAG, SEX_AT_BIRTH, GENOME_TYPE, STATUS, INVALID_REASON) missing_cols = set(ResultCsvColumns.ALL) - set(csv_reader.fieldnames) self.assertEqual(len(missing_cols), 0) rows = list(csv_reader) self.assertEqual(len(rows), 3) self.assertEqual(rows[0][ResultCsvColumns.GENOMIC_SET_NAME], 'name_xxx') self.assertEqual(rows[0][ResultCsvColumns.GENOMIC_SET_CRITERIA], 'criteria_xxx') self.assertEqual(rows[0][ResultCsvColumns.STATUS], 'valid') self.assertEqual(rows[0][ResultCsvColumns.INVALID_REASON], '') self.assertEqual(rows[0][ResultCsvColumns.PID], '1') self.assertEqual(rows[0][ResultCsvColumns.BIOBANK_ORDER_ID], '1') self.assertEqual(rows[0][ResultCsvColumns.NY_FLAG], 'Y') self.assertEqual(rows[0][ResultCsvColumns.GENOME_TYPE], 'aou_wgs') self.assertEqual(rows[0][ResultCsvColumns.SEX_AT_BIRTH], 'M') self.assertEqual(rows[1][ResultCsvColumns.GENOMIC_SET_NAME], 'name_xxx') self.assertEqual(rows[1][ResultCsvColumns.GENOMIC_SET_CRITERIA], 'criteria_xxx') self.assertEqual(rows[1][ResultCsvColumns.STATUS], 'valid') self.assertEqual(rows[1][ResultCsvColumns.INVALID_REASON], '') self.assertEqual(rows[1][ResultCsvColumns.PID], '2') self.assertEqual(rows[1][ResultCsvColumns.BIOBANK_ORDER_ID], '2') self.assertEqual(rows[1][ResultCsvColumns.NY_FLAG], 'N') self.assertEqual(rows[1][ResultCsvColumns.GENOME_TYPE], 'aou_array') self.assertEqual(rows[1][ResultCsvColumns.SEX_AT_BIRTH], 'F') self.assertEqual(rows[2][ResultCsvColumns.GENOMIC_SET_NAME], 'name_xxx') self.assertEqual(rows[2][ResultCsvColumns.GENOMIC_SET_CRITERIA], 'criteria_xxx') self.assertEqual(rows[2][ResultCsvColumns.STATUS], 'valid') self.assertEqual(rows[2][ResultCsvColumns.INVALID_REASON], '') self.assertEqual(rows[2][ResultCsvColumns.PID], '3') self.assertEqual(rows[2][ResultCsvColumns.BIOBANK_ORDER_ID], '3') self.assertEqual(rows[2][ResultCsvColumns.NY_FLAG], 'N') self.assertEqual(rows[2][ResultCsvColumns.GENOME_TYPE], 'aou_array') self.assertEqual(rows[2][ResultCsvColumns.SEX_AT_BIRTH], 'M') # verify manifest files bucket_name = config.getSetting(config.BIOBANK_SAMPLES_BUCKET_NAME) class ExpectedCsvColumns(object): VALUE = 'value' BIOBANK_ID = 'biobank_id' SEX_AT_BIRTH = 'sex_at_birth' GENOME_TYPE = 'genome_type' NY_FLAG = 'ny_flag' REQUEST_ID = 'request_id' PACKAGE_ID = 'package_id' ALL = (VALUE, SEX_AT_BIRTH, GENOME_TYPE, NY_FLAG, REQUEST_ID, PACKAGE_ID) path = self._find_latest_genomic_set_csv(bucket_name, 'Manifest') csv_file = cloudstorage_api.open(path) csv_reader = csv.DictReader(csv_file, delimiter=',') missing_cols = set(ExpectedCsvColumns.ALL) - set(csv_reader.fieldnames) self.assertEqual(len(missing_cols), 0) rows = list(csv_reader) self.assertEqual(rows[0][ExpectedCsvColumns.VALUE], '12345678') self.assertEqual(rows[0][ExpectedCsvColumns.BIOBANK_ID], '1') self.assertEqual(rows[0][ExpectedCsvColumns.SEX_AT_BIRTH], 'M') self.assertEqual(rows[0][ExpectedCsvColumns.GENOME_TYPE], 'aou_wgs') self.assertEqual(rows[0][ExpectedCsvColumns.NY_FLAG], 'Y') self.assertEqual(rows[1][ExpectedCsvColumns.VALUE], '12345679') self.assertEqual(rows[1][ExpectedCsvColumns.BIOBANK_ID], '2') self.assertEqual(rows[1][ExpectedCsvColumns.SEX_AT_BIRTH], 'F') self.assertEqual(rows[1][ExpectedCsvColumns.GENOME_TYPE], 'aou_array') self.assertEqual(rows[1][ExpectedCsvColumns.NY_FLAG], 'N') self.assertEqual(rows[2][ExpectedCsvColumns.VALUE], '12345680') self.assertEqual(rows[2][ExpectedCsvColumns.BIOBANK_ID], '3') self.assertEqual(rows[2][ExpectedCsvColumns.SEX_AT_BIRTH], 'M') self.assertEqual(rows[2][ExpectedCsvColumns.GENOME_TYPE], 'aou_array') self.assertEqual(rows[2][ExpectedCsvColumns.NY_FLAG], 'N') # verify manifest result files bucket_name = config.getSetting(config.BIOBANK_SAMPLES_BUCKET_NAME) class ExpectedCsvColumns(object): VALUE = 'value' BIOBANK_ID = 'biobank_id' SEX_AT_BIRTH = 'sex_at_birth' GENOME_TYPE = 'genome_type' NY_FLAG = 'ny_flag' REQUEST_ID = 'request_id' PACKAGE_ID = 'package_id' ALL = (VALUE, SEX_AT_BIRTH, GENOME_TYPE, NY_FLAG, REQUEST_ID, PACKAGE_ID) path = self._find_latest_genomic_set_csv(bucket_name, 'Manifest-Result') csv_file = cloudstorage_api.open(path) csv_reader = csv.DictReader(csv_file, delimiter=',') missing_cols = set(ExpectedCsvColumns.ALL) - set(csv_reader.fieldnames) self.assertEqual(len(missing_cols), 0) rows = list(csv_reader) self.assertEqual(rows[0][ExpectedCsvColumns.VALUE], '12345678') self.assertEqual(rows[0][ExpectedCsvColumns.BIOBANK_ID], '1') self.assertEqual(rows[0][ExpectedCsvColumns.SEX_AT_BIRTH], 'M') self.assertEqual(rows[0][ExpectedCsvColumns.GENOME_TYPE], 'aou_wgs') self.assertEqual(rows[0][ExpectedCsvColumns.NY_FLAG], 'Y') self.assertEqual(rows[0][ExpectedCsvColumns.PACKAGE_ID], 'PKG-XXXX-XXXX1') self.assertEqual(rows[1][ExpectedCsvColumns.VALUE], '12345679') self.assertEqual(rows[1][ExpectedCsvColumns.BIOBANK_ID], '2') self.assertEqual(rows[1][ExpectedCsvColumns.SEX_AT_BIRTH], 'F') self.assertEqual(rows[1][ExpectedCsvColumns.GENOME_TYPE], 'aou_array') self.assertEqual(rows[1][ExpectedCsvColumns.NY_FLAG], 'N') self.assertEqual(rows[1][ExpectedCsvColumns.PACKAGE_ID], 'PKG-XXXX-XXXX2') self.assertEqual(rows[2][ExpectedCsvColumns.VALUE], '12345680') self.assertEqual(rows[2][ExpectedCsvColumns.BIOBANK_ID], '3') self.assertEqual(rows[2][ExpectedCsvColumns.SEX_AT_BIRTH], 'M') self.assertEqual(rows[2][ExpectedCsvColumns.GENOME_TYPE], 'aou_array') self.assertEqual(rows[2][ExpectedCsvColumns.NY_FLAG], 'N') self.assertEqual(rows[2][ExpectedCsvColumns.PACKAGE_ID], 'PKG-XXXX-XXXX3') # verify package id in database member_dao = GenomicSetMemberDao() members = member_dao.get_all() for member in members: self.assertIn( member.packageId, ['PKG-XXXX-XXXX1', 'PKG-XXXX-XXXX2', 'PKG-XXXX-XXXX3'])