Exemplo n.º 1
0
    def test_wrong_file_name_case(self):
        samples_file = test_data.open_genomic_set_file(
            'Genomic-Test-Set-test-3.csv')

        input_filename = 'Genomic-Test-Set-v1%swrong-name.csv' % self \
          ._naive_utc_to_naive_central(clock.CLOCK.now()) \
          .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT)

        self._write_cloud_csv(input_filename, samples_file)

        with self.assertRaises(DataError):
            genomic_pipeline.process_genomic_water_line()

        manifest_result_file = test_data.open_genomic_set_file(
            'Genomic-Manifest-Result-test.csv')

        manifest_result_filename = 'Genomic-Manifest-Result-AoU-1-v1%swrong-name.csv' % self \
          ._naive_utc_to_naive_central(clock.CLOCK.now()) \
          .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT)

        self._write_cloud_csv(manifest_result_filename,
                              manifest_result_file,
                              bucket=_FAKE_BIOBANK_SAMPLE_BUCKET,
                              folder=_FAKE_BUCKET_RESULT_FOLDER)

        with self.assertRaises(DataError):
            genomic_pipeline.process_genomic_water_line()
  def test_read_from_csv_file(self):
    participant = self.participant_dao.insert(Participant(participantId=123, biobankId=1234))
    self.summary_dao.insert(self.participant_summary(participant))
    bo = self._make_biobank_order(participantId=participant.participantId, biobankOrderId='123',
                                  identifiers=[BiobankOrderIdentifier(
                                    system=u'https://www.pmi-ops.org', value=u'12345678')])
    BiobankOrderDao().insert(bo)

    participant2 = self.participant_dao.insert(Participant(participantId=124, biobankId=1235))
    self.summary_dao.insert(self.participant_summary(participant2))
    bo2 = self._make_biobank_order(participantId=participant2.participantId, biobankOrderId='124',
                                   identifiers=[BiobankOrderIdentifier(
                                     system=u'https://www.pmi-ops.org', value=u'12345679')])
    BiobankOrderDao().insert(bo2)

    participant3 = self.participant_dao.insert(Participant(participantId=125, biobankId=1236))
    self.summary_dao.insert(self.participant_summary(participant3))
    bo3 = self._make_biobank_order(participantId=participant3.participantId, biobankOrderId='125',
                                   identifiers=[BiobankOrderIdentifier(
                                     system=u'https://www.pmi-ops.org', value=u'12345680')])
    BiobankOrderDao().insert(bo3)

    samples_file = test_data.open_genomic_set_file('Genomic-Test-Set-test-1.csv')

    input_filename = 'cloud%s.csv' % self._naive_utc_to_naive_central(clock.CLOCK.now()).strftime(
        genomic_set_file_handler.INPUT_CSV_TIME_FORMAT)

    self._write_cloud_csv(input_filename, samples_file)
    genomic_set_file_handler.read_genomic_set_from_bucket()
    set_dao = GenomicSetDao()
    obj = set_dao.get_all()[0]

    self.assertEqual(obj.genomicSetName, 'name_xxx')
    self.assertEqual(obj.genomicSetCriteria, 'criteria_xxx')
    self.assertEqual(obj.genomicSetVersion, 1)

    member_dao = GenomicSetMemberDao()
    items = member_dao.get_all()
    for item in items:
      self.assertIn(item.participantId, [123, 124, 125])
      self.assertIn(item.biobankOrderId, ['123', '124', '125'])
      self.assertIn(item.biobankId, ['1234', '1235', '1236'])
      self.assertIn(item.biobankOrderClientId, ['12345678', '12345679', '12345680'])
      self.assertEqual(item.genomicSetId, 1)
      self.assertIn(item.genomeType, ['aou_wgs', 'aou_array'])
      self.assertIn(item.nyFlag, [0, 1])
      self.assertIn(item.sexAtBirth, ['F', 'M'])
Exemplo n.º 3
0
    def test_over_24hours_genomic_set_file_case(self):
        samples_file = test_data.open_genomic_set_file(
            'Genomic-Test-Set-test-3.csv')

        over_24hours_time = clock.CLOCK.now() - datetime.timedelta(hours=25)

        input_filename = 'Genomic-Test-Set-v1%s.csv' % self \
          ._naive_utc_to_naive_central(over_24hours_time) \
          .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT)

        self._write_cloud_csv(input_filename, samples_file)

        genomic_pipeline.process_genomic_water_line()

        member_dao = GenomicSetMemberDao()
        members = member_dao.get_all()
        self.assertEqual(len(members), 0)
Exemplo n.º 4
0
    def test_end_to_end_invalid_case(self):
        participant = self._make_participant()
        self._make_summary(participant, dateOfBirth='2018-02-14')
        self._make_biobank_order(participantId=participant.participantId,
                                 biobankOrderId=participant.participantId,
                                 identifiers=[
                                     BiobankOrderIdentifier(
                                         system=u'https://www.pmi-ops.org',
                                         value=u'12345678')
                                 ])

        participant2 = self._make_participant()
        self._make_summary(participant2,
                           consentForStudyEnrollmentTime=datetime.datetime(
                               1990, 1, 1))
        self._make_biobank_order(participantId=participant2.participantId,
                                 biobankOrderId=participant2.participantId,
                                 identifiers=[
                                     BiobankOrderIdentifier(
                                         system=u'https://www.pmi-ops.org',
                                         value=u'12345679')
                                 ])

        participant3 = self._make_participant()
        self._make_summary(participant3, zipCode='')
        self._make_biobank_order(participantId=participant3.participantId,
                                 biobankOrderId=participant3.participantId,
                                 identifiers=[
                                     BiobankOrderIdentifier(
                                         system=u'https://www.pmi-ops.org',
                                         value=u'12345680')
                                 ])

        participant4 = self._make_participant()
        self._make_summary(participant4)
        self._make_biobank_order(
            participantId=participant4.participantId,
            biobankOrderId=participant4.participantId,
            identifiers=[BiobankOrderIdentifier(system=u'c', value=u'e')])

        samples_file = test_data.open_genomic_set_file(
            'Genomic-Test-Set-test-3.csv')

        input_filename = 'Genomic-Test-Set-v1%s.csv' % self\
          ._naive_utc_to_naive_central(clock.CLOCK.now())\
          .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT)

        self._write_cloud_csv(input_filename, samples_file)

        genomic_pipeline.process_genomic_water_line()

        # verify result file
        bucket_name = config.getSetting(config.GENOMIC_SET_BUCKET_NAME)
        path = self._find_latest_genomic_set_csv(bucket_name,
                                                 'Validation-Result')
        csv_file = cloudstorage_api.open(path)
        csv_reader = csv.DictReader(csv_file, delimiter=',')

        class ResultCsvColumns(object):
            """Names of CSV columns that we read from the genomic set upload."""
            GENOMIC_SET_NAME = 'genomic_set_name'
            GENOMIC_SET_CRITERIA = 'genomic_set_criteria'
            PID = 'pid'
            BIOBANK_ORDER_ID = 'biobank_order_id'
            NY_FLAG = 'ny_flag'
            SEX_AT_BIRTH = 'sex_at_birth'
            GENOME_TYPE = 'genome_type'
            STATUS = 'status'
            INVALID_REASON = 'invalid_reason'

            ALL = (GENOMIC_SET_NAME, GENOMIC_SET_CRITERIA, PID,
                   BIOBANK_ORDER_ID, NY_FLAG, SEX_AT_BIRTH, GENOME_TYPE,
                   STATUS, INVALID_REASON)

        missing_cols = set(ResultCsvColumns.ALL) - set(csv_reader.fieldnames)
        self.assertEqual(len(missing_cols), 0)
        rows = list(csv_reader)
        self.assertEqual(len(rows), 4)
        self.assertEqual(rows[0][ResultCsvColumns.GENOMIC_SET_NAME],
                         'name_xxx')
        self.assertEqual(rows[0][ResultCsvColumns.GENOMIC_SET_CRITERIA],
                         'criteria_xxx')
        self.assertEqual(rows[0][ResultCsvColumns.STATUS], 'invalid')
        self.assertEqual(rows[0][ResultCsvColumns.INVALID_REASON],
                         'INVALID_AGE')
        self.assertEqual(rows[0][ResultCsvColumns.PID], '1')
        self.assertEqual(rows[0][ResultCsvColumns.BIOBANK_ORDER_ID], '1')
        self.assertEqual(rows[0][ResultCsvColumns.NY_FLAG], 'Y')
        self.assertEqual(rows[0][ResultCsvColumns.GENOME_TYPE], 'aou_wgs')
        self.assertEqual(rows[0][ResultCsvColumns.SEX_AT_BIRTH], 'M')

        self.assertEqual(rows[1][ResultCsvColumns.GENOMIC_SET_NAME],
                         'name_xxx')
        self.assertEqual(rows[1][ResultCsvColumns.GENOMIC_SET_CRITERIA],
                         'criteria_xxx')
        self.assertEqual(rows[1][ResultCsvColumns.STATUS], 'invalid')
        self.assertEqual(rows[1][ResultCsvColumns.INVALID_REASON],
                         'INVALID_CONSENT')
        self.assertEqual(rows[1][ResultCsvColumns.PID], '2')
        self.assertEqual(rows[1][ResultCsvColumns.BIOBANK_ORDER_ID], '2')
        self.assertEqual(rows[1][ResultCsvColumns.NY_FLAG], 'N')
        self.assertEqual(rows[1][ResultCsvColumns.GENOME_TYPE], 'aou_array')
        self.assertEqual(rows[1][ResultCsvColumns.SEX_AT_BIRTH], 'F')

        self.assertEqual(rows[2][ResultCsvColumns.GENOMIC_SET_NAME],
                         'name_xxx')
        self.assertEqual(rows[2][ResultCsvColumns.GENOMIC_SET_CRITERIA],
                         'criteria_xxx')
        self.assertEqual(rows[2][ResultCsvColumns.STATUS], 'invalid')
        self.assertEqual(rows[2][ResultCsvColumns.INVALID_REASON],
                         'INVALID_NY_ZIPCODE')
        self.assertEqual(rows[2][ResultCsvColumns.PID], '3')
        self.assertEqual(rows[2][ResultCsvColumns.BIOBANK_ORDER_ID], '3')
        self.assertEqual(rows[2][ResultCsvColumns.NY_FLAG], 'N')
        self.assertEqual(rows[2][ResultCsvColumns.GENOME_TYPE], 'aou_array')
        self.assertEqual(rows[2][ResultCsvColumns.SEX_AT_BIRTH], 'M')

        self.assertEqual(rows[3][ResultCsvColumns.GENOMIC_SET_NAME],
                         'name_xxx')
        self.assertEqual(rows[3][ResultCsvColumns.GENOMIC_SET_CRITERIA],
                         'criteria_xxx')
        self.assertEqual(rows[3][ResultCsvColumns.STATUS], 'invalid')
        self.assertEqual(rows[3][ResultCsvColumns.INVALID_REASON],
                         'INVALID_BIOBANK_ORDER_CLIENT_ID')
        self.assertEqual(rows[3][ResultCsvColumns.PID], '4')
        self.assertEqual(rows[3][ResultCsvColumns.BIOBANK_ORDER_ID], '4')
        self.assertEqual(rows[3][ResultCsvColumns.NY_FLAG], 'Y')
        self.assertEqual(rows[3][ResultCsvColumns.GENOME_TYPE], 'aou_wgs')
        self.assertEqual(rows[3][ResultCsvColumns.SEX_AT_BIRTH], 'F')
Exemplo n.º 5
0
    def test_end_to_end_valid_case(self):
        participant = self._make_participant()
        self._make_summary(participant)
        self._make_biobank_order(participantId=participant.participantId,
                                 biobankOrderId=participant.participantId,
                                 identifiers=[
                                     BiobankOrderIdentifier(
                                         system=u'https://www.pmi-ops.org',
                                         value=u'12345678')
                                 ])

        participant2 = self._make_participant()
        self._make_summary(participant2)
        self._make_biobank_order(participantId=participant2.participantId,
                                 biobankOrderId=participant2.participantId,
                                 identifiers=[
                                     BiobankOrderIdentifier(
                                         system=u'https://www.pmi-ops.org',
                                         value=u'12345679')
                                 ])

        participant3 = self._make_participant()
        self._make_summary(participant3)
        self._make_biobank_order(participantId=participant3.participantId,
                                 biobankOrderId=participant3.participantId,
                                 identifiers=[
                                     BiobankOrderIdentifier(
                                         system=u'https://www.pmi-ops.org',
                                         value=u'12345680')
                                 ])

        samples_file = test_data.open_genomic_set_file(
            'Genomic-Test-Set-test-2.csv')

        input_filename = 'Genomic-Test-Set-v1%s.csv' % self\
          ._naive_utc_to_naive_central(clock.CLOCK.now())\
          .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT)

        self._write_cloud_csv(input_filename, samples_file)

        manifest_result_file = test_data.open_genomic_set_file(
            'Genomic-Manifest-Result-test.csv')

        manifest_result_filename = 'Genomic-Manifest-Result-AoU-1-v1%s.csv' % self \
          ._naive_utc_to_naive_central(clock.CLOCK.now()) \
          .strftime(genomic_set_file_handler.INPUT_CSV_TIME_FORMAT)

        self._write_cloud_csv(manifest_result_filename,
                              manifest_result_file,
                              bucket=_FAKE_BIOBANK_SAMPLE_BUCKET,
                              folder=_FAKE_BUCKET_RESULT_FOLDER)

        genomic_pipeline.process_genomic_water_line()

        # verify result file
        bucket_name = config.getSetting(config.GENOMIC_SET_BUCKET_NAME)
        path = self._find_latest_genomic_set_csv(bucket_name,
                                                 'Validation-Result')
        csv_file = cloudstorage_api.open(path)
        csv_reader = csv.DictReader(csv_file, delimiter=',')

        class ResultCsvColumns(object):
            """Names of CSV columns that we read from the genomic set upload."""
            GENOMIC_SET_NAME = 'genomic_set_name'
            GENOMIC_SET_CRITERIA = 'genomic_set_criteria'
            PID = 'pid'
            BIOBANK_ORDER_ID = 'biobank_order_id'
            NY_FLAG = 'ny_flag'
            SEX_AT_BIRTH = 'sex_at_birth'
            GENOME_TYPE = 'genome_type'
            STATUS = 'status'
            INVALID_REASON = 'invalid_reason'

            ALL = (GENOMIC_SET_NAME, GENOMIC_SET_CRITERIA, PID,
                   BIOBANK_ORDER_ID, NY_FLAG, SEX_AT_BIRTH, GENOME_TYPE,
                   STATUS, INVALID_REASON)

        missing_cols = set(ResultCsvColumns.ALL) - set(csv_reader.fieldnames)
        self.assertEqual(len(missing_cols), 0)
        rows = list(csv_reader)
        self.assertEqual(len(rows), 3)
        self.assertEqual(rows[0][ResultCsvColumns.GENOMIC_SET_NAME],
                         'name_xxx')
        self.assertEqual(rows[0][ResultCsvColumns.GENOMIC_SET_CRITERIA],
                         'criteria_xxx')
        self.assertEqual(rows[0][ResultCsvColumns.STATUS], 'valid')
        self.assertEqual(rows[0][ResultCsvColumns.INVALID_REASON], '')
        self.assertEqual(rows[0][ResultCsvColumns.PID], '1')
        self.assertEqual(rows[0][ResultCsvColumns.BIOBANK_ORDER_ID], '1')
        self.assertEqual(rows[0][ResultCsvColumns.NY_FLAG], 'Y')
        self.assertEqual(rows[0][ResultCsvColumns.GENOME_TYPE], 'aou_wgs')
        self.assertEqual(rows[0][ResultCsvColumns.SEX_AT_BIRTH], 'M')

        self.assertEqual(rows[1][ResultCsvColumns.GENOMIC_SET_NAME],
                         'name_xxx')
        self.assertEqual(rows[1][ResultCsvColumns.GENOMIC_SET_CRITERIA],
                         'criteria_xxx')
        self.assertEqual(rows[1][ResultCsvColumns.STATUS], 'valid')
        self.assertEqual(rows[1][ResultCsvColumns.INVALID_REASON], '')
        self.assertEqual(rows[1][ResultCsvColumns.PID], '2')
        self.assertEqual(rows[1][ResultCsvColumns.BIOBANK_ORDER_ID], '2')
        self.assertEqual(rows[1][ResultCsvColumns.NY_FLAG], 'N')
        self.assertEqual(rows[1][ResultCsvColumns.GENOME_TYPE], 'aou_array')
        self.assertEqual(rows[1][ResultCsvColumns.SEX_AT_BIRTH], 'F')

        self.assertEqual(rows[2][ResultCsvColumns.GENOMIC_SET_NAME],
                         'name_xxx')
        self.assertEqual(rows[2][ResultCsvColumns.GENOMIC_SET_CRITERIA],
                         'criteria_xxx')
        self.assertEqual(rows[2][ResultCsvColumns.STATUS], 'valid')
        self.assertEqual(rows[2][ResultCsvColumns.INVALID_REASON], '')
        self.assertEqual(rows[2][ResultCsvColumns.PID], '3')
        self.assertEqual(rows[2][ResultCsvColumns.BIOBANK_ORDER_ID], '3')
        self.assertEqual(rows[2][ResultCsvColumns.NY_FLAG], 'N')
        self.assertEqual(rows[2][ResultCsvColumns.GENOME_TYPE], 'aou_array')
        self.assertEqual(rows[2][ResultCsvColumns.SEX_AT_BIRTH], 'M')

        # verify manifest files
        bucket_name = config.getSetting(config.BIOBANK_SAMPLES_BUCKET_NAME)

        class ExpectedCsvColumns(object):
            VALUE = 'value'
            BIOBANK_ID = 'biobank_id'
            SEX_AT_BIRTH = 'sex_at_birth'
            GENOME_TYPE = 'genome_type'
            NY_FLAG = 'ny_flag'
            REQUEST_ID = 'request_id'
            PACKAGE_ID = 'package_id'

            ALL = (VALUE, SEX_AT_BIRTH, GENOME_TYPE, NY_FLAG, REQUEST_ID,
                   PACKAGE_ID)

        path = self._find_latest_genomic_set_csv(bucket_name, 'Manifest')
        csv_file = cloudstorage_api.open(path)
        csv_reader = csv.DictReader(csv_file, delimiter=',')

        missing_cols = set(ExpectedCsvColumns.ALL) - set(csv_reader.fieldnames)
        self.assertEqual(len(missing_cols), 0)
        rows = list(csv_reader)
        self.assertEqual(rows[0][ExpectedCsvColumns.VALUE], '12345678')
        self.assertEqual(rows[0][ExpectedCsvColumns.BIOBANK_ID], '1')
        self.assertEqual(rows[0][ExpectedCsvColumns.SEX_AT_BIRTH], 'M')
        self.assertEqual(rows[0][ExpectedCsvColumns.GENOME_TYPE], 'aou_wgs')
        self.assertEqual(rows[0][ExpectedCsvColumns.NY_FLAG], 'Y')
        self.assertEqual(rows[1][ExpectedCsvColumns.VALUE], '12345679')
        self.assertEqual(rows[1][ExpectedCsvColumns.BIOBANK_ID], '2')
        self.assertEqual(rows[1][ExpectedCsvColumns.SEX_AT_BIRTH], 'F')
        self.assertEqual(rows[1][ExpectedCsvColumns.GENOME_TYPE], 'aou_array')
        self.assertEqual(rows[1][ExpectedCsvColumns.NY_FLAG], 'N')
        self.assertEqual(rows[2][ExpectedCsvColumns.VALUE], '12345680')
        self.assertEqual(rows[2][ExpectedCsvColumns.BIOBANK_ID], '3')
        self.assertEqual(rows[2][ExpectedCsvColumns.SEX_AT_BIRTH], 'M')
        self.assertEqual(rows[2][ExpectedCsvColumns.GENOME_TYPE], 'aou_array')
        self.assertEqual(rows[2][ExpectedCsvColumns.NY_FLAG], 'N')

        # verify manifest result files
        bucket_name = config.getSetting(config.BIOBANK_SAMPLES_BUCKET_NAME)

        class ExpectedCsvColumns(object):
            VALUE = 'value'
            BIOBANK_ID = 'biobank_id'
            SEX_AT_BIRTH = 'sex_at_birth'
            GENOME_TYPE = 'genome_type'
            NY_FLAG = 'ny_flag'
            REQUEST_ID = 'request_id'
            PACKAGE_ID = 'package_id'

            ALL = (VALUE, SEX_AT_BIRTH, GENOME_TYPE, NY_FLAG, REQUEST_ID,
                   PACKAGE_ID)

        path = self._find_latest_genomic_set_csv(bucket_name,
                                                 'Manifest-Result')
        csv_file = cloudstorage_api.open(path)
        csv_reader = csv.DictReader(csv_file, delimiter=',')

        missing_cols = set(ExpectedCsvColumns.ALL) - set(csv_reader.fieldnames)
        self.assertEqual(len(missing_cols), 0)
        rows = list(csv_reader)
        self.assertEqual(rows[0][ExpectedCsvColumns.VALUE], '12345678')
        self.assertEqual(rows[0][ExpectedCsvColumns.BIOBANK_ID], '1')
        self.assertEqual(rows[0][ExpectedCsvColumns.SEX_AT_BIRTH], 'M')
        self.assertEqual(rows[0][ExpectedCsvColumns.GENOME_TYPE], 'aou_wgs')
        self.assertEqual(rows[0][ExpectedCsvColumns.NY_FLAG], 'Y')
        self.assertEqual(rows[0][ExpectedCsvColumns.PACKAGE_ID],
                         'PKG-XXXX-XXXX1')

        self.assertEqual(rows[1][ExpectedCsvColumns.VALUE], '12345679')
        self.assertEqual(rows[1][ExpectedCsvColumns.BIOBANK_ID], '2')
        self.assertEqual(rows[1][ExpectedCsvColumns.SEX_AT_BIRTH], 'F')
        self.assertEqual(rows[1][ExpectedCsvColumns.GENOME_TYPE], 'aou_array')
        self.assertEqual(rows[1][ExpectedCsvColumns.NY_FLAG], 'N')
        self.assertEqual(rows[1][ExpectedCsvColumns.PACKAGE_ID],
                         'PKG-XXXX-XXXX2')

        self.assertEqual(rows[2][ExpectedCsvColumns.VALUE], '12345680')
        self.assertEqual(rows[2][ExpectedCsvColumns.BIOBANK_ID], '3')
        self.assertEqual(rows[2][ExpectedCsvColumns.SEX_AT_BIRTH], 'M')
        self.assertEqual(rows[2][ExpectedCsvColumns.GENOME_TYPE], 'aou_array')
        self.assertEqual(rows[2][ExpectedCsvColumns.NY_FLAG], 'N')
        self.assertEqual(rows[2][ExpectedCsvColumns.PACKAGE_ID],
                         'PKG-XXXX-XXXX3')

        # verify package id in database
        member_dao = GenomicSetMemberDao()
        members = member_dao.get_all()
        for member in members:
            self.assertIn(
                member.packageId,
                ['PKG-XXXX-XXXX1', 'PKG-XXXX-XXXX2', 'PKG-XXXX-XXXX3'])