示例#1
0
    def test_wrong_file_name_case(self):
        samples_file = test_data.open_genomic_set_file(
            'Genomic-Test-Set-test-3.csv')

        input_filename = 'Genomic-Test-Set-v1%swrong-name.csv' % self \
          ._naive_utc_to_naive_central(clock.CLOCK.now()) \
          .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT)

        self._write_cloud_csv(input_filename, samples_file)

        with self.assertRaises(DataError):
            genomic_pipeline.process_genomic_water_line()

        manifest_result_file = test_data.open_genomic_set_file(
            'Genomic-Manifest-Result-test.csv')

        manifest_result_filename = 'Genomic-Manifest-Result-AoU-1-v1%swrong-name.csv' % self \
          ._naive_utc_to_naive_central(clock.CLOCK.now()) \
          .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT)

        self._write_cloud_csv(manifest_result_filename,
                              manifest_result_file,
                              bucket=_FAKE_BIOBANK_SAMPLE_BUCKET,
                              folder=_FAKE_BUCKET_RESULT_FOLDER)

        with self.assertRaises(DataError):
            genomic_pipeline.process_genomic_water_line()
示例#2
0
    def test_over_24hours_genomic_set_file_case(self):
        samples_file = test_data.open_genomic_set_file(
            'Genomic-Test-Set-test-3.csv')

        over_24hours_time = clock.CLOCK.now() - datetime.timedelta(hours=25)

        input_filename = 'Genomic-Test-Set-v1%s.csv' % self \
          ._naive_utc_to_naive_central(over_24hours_time) \
          .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT)

        self._write_cloud_csv(input_filename, samples_file)

        genomic_pipeline.process_genomic_water_line()

        member_dao = GenomicSetMemberDao()
        members = member_dao.get_all()
        self.assertEqual(len(members), 0)
示例#3
0
    def test_end_to_end_invalid_case(self):
        participant = self._make_participant()
        self._make_summary(participant, dateOfBirth='2018-02-14')
        self._make_biobank_order(participantId=participant.participantId,
                                 biobankOrderId=participant.participantId,
                                 identifiers=[
                                     BiobankOrderIdentifier(
                                         system=u'https://www.pmi-ops.org',
                                         value=u'12345678')
                                 ])

        participant2 = self._make_participant()
        self._make_summary(participant2,
                           consentForStudyEnrollmentTime=datetime.datetime(
                               1990, 1, 1))
        self._make_biobank_order(participantId=participant2.participantId,
                                 biobankOrderId=participant2.participantId,
                                 identifiers=[
                                     BiobankOrderIdentifier(
                                         system=u'https://www.pmi-ops.org',
                                         value=u'12345679')
                                 ])

        participant3 = self._make_participant()
        self._make_summary(participant3, zipCode='')
        self._make_biobank_order(participantId=participant3.participantId,
                                 biobankOrderId=participant3.participantId,
                                 identifiers=[
                                     BiobankOrderIdentifier(
                                         system=u'https://www.pmi-ops.org',
                                         value=u'12345680')
                                 ])

        participant4 = self._make_participant()
        self._make_summary(participant4)
        self._make_biobank_order(
            participantId=participant4.participantId,
            biobankOrderId=participant4.participantId,
            identifiers=[BiobankOrderIdentifier(system=u'c', value=u'e')])

        samples_file = test_data.open_genomic_set_file(
            'Genomic-Test-Set-test-3.csv')

        input_filename = 'Genomic-Test-Set-v1%s.csv' % self\
          ._naive_utc_to_naive_central(clock.CLOCK.now())\
          .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT)

        self._write_cloud_csv(input_filename, samples_file)

        genomic_pipeline.process_genomic_water_line()

        # verify result file
        bucket_name = config.getSetting(config.GENOMIC_SET_BUCKET_NAME)
        path = self._find_latest_genomic_set_csv(bucket_name,
                                                 'Validation-Result')
        csv_file = cloudstorage_api.open(path)
        csv_reader = csv.DictReader(csv_file, delimiter=',')

        class ResultCsvColumns(object):
            """Names of CSV columns that we read from the genomic set upload."""
            GENOMIC_SET_NAME = 'genomic_set_name'
            GENOMIC_SET_CRITERIA = 'genomic_set_criteria'
            PID = 'pid'
            BIOBANK_ORDER_ID = 'biobank_order_id'
            NY_FLAG = 'ny_flag'
            SEX_AT_BIRTH = 'sex_at_birth'
            GENOME_TYPE = 'genome_type'
            STATUS = 'status'
            INVALID_REASON = 'invalid_reason'

            ALL = (GENOMIC_SET_NAME, GENOMIC_SET_CRITERIA, PID,
                   BIOBANK_ORDER_ID, NY_FLAG, SEX_AT_BIRTH, GENOME_TYPE,
                   STATUS, INVALID_REASON)

        missing_cols = set(ResultCsvColumns.ALL) - set(csv_reader.fieldnames)
        self.assertEqual(len(missing_cols), 0)
        rows = list(csv_reader)
        self.assertEqual(len(rows), 4)
        self.assertEqual(rows[0][ResultCsvColumns.GENOMIC_SET_NAME],
                         'name_xxx')
        self.assertEqual(rows[0][ResultCsvColumns.GENOMIC_SET_CRITERIA],
                         'criteria_xxx')
        self.assertEqual(rows[0][ResultCsvColumns.STATUS], 'invalid')
        self.assertEqual(rows[0][ResultCsvColumns.INVALID_REASON],
                         'INVALID_AGE')
        self.assertEqual(rows[0][ResultCsvColumns.PID], '1')
        self.assertEqual(rows[0][ResultCsvColumns.BIOBANK_ORDER_ID], '1')
        self.assertEqual(rows[0][ResultCsvColumns.NY_FLAG], 'Y')
        self.assertEqual(rows[0][ResultCsvColumns.GENOME_TYPE], 'aou_wgs')
        self.assertEqual(rows[0][ResultCsvColumns.SEX_AT_BIRTH], 'M')

        self.assertEqual(rows[1][ResultCsvColumns.GENOMIC_SET_NAME],
                         'name_xxx')
        self.assertEqual(rows[1][ResultCsvColumns.GENOMIC_SET_CRITERIA],
                         'criteria_xxx')
        self.assertEqual(rows[1][ResultCsvColumns.STATUS], 'invalid')
        self.assertEqual(rows[1][ResultCsvColumns.INVALID_REASON],
                         'INVALID_CONSENT')
        self.assertEqual(rows[1][ResultCsvColumns.PID], '2')
        self.assertEqual(rows[1][ResultCsvColumns.BIOBANK_ORDER_ID], '2')
        self.assertEqual(rows[1][ResultCsvColumns.NY_FLAG], 'N')
        self.assertEqual(rows[1][ResultCsvColumns.GENOME_TYPE], 'aou_array')
        self.assertEqual(rows[1][ResultCsvColumns.SEX_AT_BIRTH], 'F')

        self.assertEqual(rows[2][ResultCsvColumns.GENOMIC_SET_NAME],
                         'name_xxx')
        self.assertEqual(rows[2][ResultCsvColumns.GENOMIC_SET_CRITERIA],
                         'criteria_xxx')
        self.assertEqual(rows[2][ResultCsvColumns.STATUS], 'invalid')
        self.assertEqual(rows[2][ResultCsvColumns.INVALID_REASON],
                         'INVALID_NY_ZIPCODE')
        self.assertEqual(rows[2][ResultCsvColumns.PID], '3')
        self.assertEqual(rows[2][ResultCsvColumns.BIOBANK_ORDER_ID], '3')
        self.assertEqual(rows[2][ResultCsvColumns.NY_FLAG], 'N')
        self.assertEqual(rows[2][ResultCsvColumns.GENOME_TYPE], 'aou_array')
        self.assertEqual(rows[2][ResultCsvColumns.SEX_AT_BIRTH], 'M')

        self.assertEqual(rows[3][ResultCsvColumns.GENOMIC_SET_NAME],
                         'name_xxx')
        self.assertEqual(rows[3][ResultCsvColumns.GENOMIC_SET_CRITERIA],
                         'criteria_xxx')
        self.assertEqual(rows[3][ResultCsvColumns.STATUS], 'invalid')
        self.assertEqual(rows[3][ResultCsvColumns.INVALID_REASON],
                         'INVALID_BIOBANK_ORDER_CLIENT_ID')
        self.assertEqual(rows[3][ResultCsvColumns.PID], '4')
        self.assertEqual(rows[3][ResultCsvColumns.BIOBANK_ORDER_ID], '4')
        self.assertEqual(rows[3][ResultCsvColumns.NY_FLAG], 'Y')
        self.assertEqual(rows[3][ResultCsvColumns.GENOME_TYPE], 'aou_wgs')
        self.assertEqual(rows[3][ResultCsvColumns.SEX_AT_BIRTH], 'F')
示例#4
0
    def test_end_to_end_valid_case(self):
        participant = self._make_participant()
        self._make_summary(participant)
        self._make_biobank_order(participantId=participant.participantId,
                                 biobankOrderId=participant.participantId,
                                 identifiers=[
                                     BiobankOrderIdentifier(
                                         system=u'https://www.pmi-ops.org',
                                         value=u'12345678')
                                 ])

        participant2 = self._make_participant()
        self._make_summary(participant2)
        self._make_biobank_order(participantId=participant2.participantId,
                                 biobankOrderId=participant2.participantId,
                                 identifiers=[
                                     BiobankOrderIdentifier(
                                         system=u'https://www.pmi-ops.org',
                                         value=u'12345679')
                                 ])

        participant3 = self._make_participant()
        self._make_summary(participant3)
        self._make_biobank_order(participantId=participant3.participantId,
                                 biobankOrderId=participant3.participantId,
                                 identifiers=[
                                     BiobankOrderIdentifier(
                                         system=u'https://www.pmi-ops.org',
                                         value=u'12345680')
                                 ])

        samples_file = test_data.open_genomic_set_file(
            'Genomic-Test-Set-test-2.csv')

        input_filename = 'Genomic-Test-Set-v1%s.csv' % self\
          ._naive_utc_to_naive_central(clock.CLOCK.now())\
          .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT)

        self._write_cloud_csv(input_filename, samples_file)

        manifest_result_file = test_data.open_genomic_set_file(
            'Genomic-Manifest-Result-test.csv')

        manifest_result_filename = 'Genomic-Manifest-Result-AoU-1-v1%s.csv' % self \
          ._naive_utc_to_naive_central(clock.CLOCK.now()) \
          .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT)

        self._write_cloud_csv(manifest_result_filename,
                              manifest_result_file,
                              bucket=_FAKE_BIOBANK_SAMPLE_BUCKET,
                              folder=_FAKE_BUCKET_RESULT_FOLDER)

        genomic_pipeline.process_genomic_water_line()

        # verify result file
        bucket_name = config.getSetting(config.GENOMIC_SET_BUCKET_NAME)
        path = self._find_latest_genomic_set_csv(bucket_name,
                                                 'Validation-Result')
        csv_file = cloudstorage_api.open(path)
        csv_reader = csv.DictReader(csv_file, delimiter=',')

        class ResultCsvColumns(object):
            """Names of CSV columns that we read from the genomic set upload."""
            GENOMIC_SET_NAME = 'genomic_set_name'
            GENOMIC_SET_CRITERIA = 'genomic_set_criteria'
            PID = 'pid'
            BIOBANK_ORDER_ID = 'biobank_order_id'
            NY_FLAG = 'ny_flag'
            SEX_AT_BIRTH = 'sex_at_birth'
            GENOME_TYPE = 'genome_type'
            STATUS = 'status'
            INVALID_REASON = 'invalid_reason'

            ALL = (GENOMIC_SET_NAME, GENOMIC_SET_CRITERIA, PID,
                   BIOBANK_ORDER_ID, NY_FLAG, SEX_AT_BIRTH, GENOME_TYPE,
                   STATUS, INVALID_REASON)

        missing_cols = set(ResultCsvColumns.ALL) - set(csv_reader.fieldnames)
        self.assertEqual(len(missing_cols), 0)
        rows = list(csv_reader)
        self.assertEqual(len(rows), 3)
        self.assertEqual(rows[0][ResultCsvColumns.GENOMIC_SET_NAME],
                         'name_xxx')
        self.assertEqual(rows[0][ResultCsvColumns.GENOMIC_SET_CRITERIA],
                         'criteria_xxx')
        self.assertEqual(rows[0][ResultCsvColumns.STATUS], 'valid')
        self.assertEqual(rows[0][ResultCsvColumns.INVALID_REASON], '')
        self.assertEqual(rows[0][ResultCsvColumns.PID], '1')
        self.assertEqual(rows[0][ResultCsvColumns.BIOBANK_ORDER_ID], '1')
        self.assertEqual(rows[0][ResultCsvColumns.NY_FLAG], 'Y')
        self.assertEqual(rows[0][ResultCsvColumns.GENOME_TYPE], 'aou_wgs')
        self.assertEqual(rows[0][ResultCsvColumns.SEX_AT_BIRTH], 'M')

        self.assertEqual(rows[1][ResultCsvColumns.GENOMIC_SET_NAME],
                         'name_xxx')
        self.assertEqual(rows[1][ResultCsvColumns.GENOMIC_SET_CRITERIA],
                         'criteria_xxx')
        self.assertEqual(rows[1][ResultCsvColumns.STATUS], 'valid')
        self.assertEqual(rows[1][ResultCsvColumns.INVALID_REASON], '')
        self.assertEqual(rows[1][ResultCsvColumns.PID], '2')
        self.assertEqual(rows[1][ResultCsvColumns.BIOBANK_ORDER_ID], '2')
        self.assertEqual(rows[1][ResultCsvColumns.NY_FLAG], 'N')
        self.assertEqual(rows[1][ResultCsvColumns.GENOME_TYPE], 'aou_array')
        self.assertEqual(rows[1][ResultCsvColumns.SEX_AT_BIRTH], 'F')

        self.assertEqual(rows[2][ResultCsvColumns.GENOMIC_SET_NAME],
                         'name_xxx')
        self.assertEqual(rows[2][ResultCsvColumns.GENOMIC_SET_CRITERIA],
                         'criteria_xxx')
        self.assertEqual(rows[2][ResultCsvColumns.STATUS], 'valid')
        self.assertEqual(rows[2][ResultCsvColumns.INVALID_REASON], '')
        self.assertEqual(rows[2][ResultCsvColumns.PID], '3')
        self.assertEqual(rows[2][ResultCsvColumns.BIOBANK_ORDER_ID], '3')
        self.assertEqual(rows[2][ResultCsvColumns.NY_FLAG], 'N')
        self.assertEqual(rows[2][ResultCsvColumns.GENOME_TYPE], 'aou_array')
        self.assertEqual(rows[2][ResultCsvColumns.SEX_AT_BIRTH], 'M')

        # verify manifest files
        bucket_name = config.getSetting(config.BIOBANK_SAMPLES_BUCKET_NAME)

        class ExpectedCsvColumns(object):
            VALUE = 'value'
            BIOBANK_ID = 'biobank_id'
            SEX_AT_BIRTH = 'sex_at_birth'
            GENOME_TYPE = 'genome_type'
            NY_FLAG = 'ny_flag'
            REQUEST_ID = 'request_id'
            PACKAGE_ID = 'package_id'

            ALL = (VALUE, SEX_AT_BIRTH, GENOME_TYPE, NY_FLAG, REQUEST_ID,
                   PACKAGE_ID)

        path = self._find_latest_genomic_set_csv(bucket_name, 'Manifest')
        csv_file = cloudstorage_api.open(path)
        csv_reader = csv.DictReader(csv_file, delimiter=',')

        missing_cols = set(ExpectedCsvColumns.ALL) - set(csv_reader.fieldnames)
        self.assertEqual(len(missing_cols), 0)
        rows = list(csv_reader)
        self.assertEqual(rows[0][ExpectedCsvColumns.VALUE], '12345678')
        self.assertEqual(rows[0][ExpectedCsvColumns.BIOBANK_ID], '1')
        self.assertEqual(rows[0][ExpectedCsvColumns.SEX_AT_BIRTH], 'M')
        self.assertEqual(rows[0][ExpectedCsvColumns.GENOME_TYPE], 'aou_wgs')
        self.assertEqual(rows[0][ExpectedCsvColumns.NY_FLAG], 'Y')
        self.assertEqual(rows[1][ExpectedCsvColumns.VALUE], '12345679')
        self.assertEqual(rows[1][ExpectedCsvColumns.BIOBANK_ID], '2')
        self.assertEqual(rows[1][ExpectedCsvColumns.SEX_AT_BIRTH], 'F')
        self.assertEqual(rows[1][ExpectedCsvColumns.GENOME_TYPE], 'aou_array')
        self.assertEqual(rows[1][ExpectedCsvColumns.NY_FLAG], 'N')
        self.assertEqual(rows[2][ExpectedCsvColumns.VALUE], '12345680')
        self.assertEqual(rows[2][ExpectedCsvColumns.BIOBANK_ID], '3')
        self.assertEqual(rows[2][ExpectedCsvColumns.SEX_AT_BIRTH], 'M')
        self.assertEqual(rows[2][ExpectedCsvColumns.GENOME_TYPE], 'aou_array')
        self.assertEqual(rows[2][ExpectedCsvColumns.NY_FLAG], 'N')

        # verify manifest result files
        bucket_name = config.getSetting(config.BIOBANK_SAMPLES_BUCKET_NAME)

        class ExpectedCsvColumns(object):
            VALUE = 'value'
            BIOBANK_ID = 'biobank_id'
            SEX_AT_BIRTH = 'sex_at_birth'
            GENOME_TYPE = 'genome_type'
            NY_FLAG = 'ny_flag'
            REQUEST_ID = 'request_id'
            PACKAGE_ID = 'package_id'

            ALL = (VALUE, SEX_AT_BIRTH, GENOME_TYPE, NY_FLAG, REQUEST_ID,
                   PACKAGE_ID)

        path = self._find_latest_genomic_set_csv(bucket_name,
                                                 'Manifest-Result')
        csv_file = cloudstorage_api.open(path)
        csv_reader = csv.DictReader(csv_file, delimiter=',')

        missing_cols = set(ExpectedCsvColumns.ALL) - set(csv_reader.fieldnames)
        self.assertEqual(len(missing_cols), 0)
        rows = list(csv_reader)
        self.assertEqual(rows[0][ExpectedCsvColumns.VALUE], '12345678')
        self.assertEqual(rows[0][ExpectedCsvColumns.BIOBANK_ID], '1')
        self.assertEqual(rows[0][ExpectedCsvColumns.SEX_AT_BIRTH], 'M')
        self.assertEqual(rows[0][ExpectedCsvColumns.GENOME_TYPE], 'aou_wgs')
        self.assertEqual(rows[0][ExpectedCsvColumns.NY_FLAG], 'Y')
        self.assertEqual(rows[0][ExpectedCsvColumns.PACKAGE_ID],
                         'PKG-XXXX-XXXX1')

        self.assertEqual(rows[1][ExpectedCsvColumns.VALUE], '12345679')
        self.assertEqual(rows[1][ExpectedCsvColumns.BIOBANK_ID], '2')
        self.assertEqual(rows[1][ExpectedCsvColumns.SEX_AT_BIRTH], 'F')
        self.assertEqual(rows[1][ExpectedCsvColumns.GENOME_TYPE], 'aou_array')
        self.assertEqual(rows[1][ExpectedCsvColumns.NY_FLAG], 'N')
        self.assertEqual(rows[1][ExpectedCsvColumns.PACKAGE_ID],
                         'PKG-XXXX-XXXX2')

        self.assertEqual(rows[2][ExpectedCsvColumns.VALUE], '12345680')
        self.assertEqual(rows[2][ExpectedCsvColumns.BIOBANK_ID], '3')
        self.assertEqual(rows[2][ExpectedCsvColumns.SEX_AT_BIRTH], 'M')
        self.assertEqual(rows[2][ExpectedCsvColumns.GENOME_TYPE], 'aou_array')
        self.assertEqual(rows[2][ExpectedCsvColumns.NY_FLAG], 'N')
        self.assertEqual(rows[2][ExpectedCsvColumns.PACKAGE_ID],
                         'PKG-XXXX-XXXX3')

        # verify package id in database
        member_dao = GenomicSetMemberDao()
        members = member_dao.get_all()
        for member in members:
            self.assertIn(
                member.packageId,
                ['PKG-XXXX-XXXX1', 'PKG-XXXX-XXXX2', 'PKG-XXXX-XXXX3'])