Exemplo n.º 1
0
    def test_copy_rdr_tables(self):
        for table in RDR_TABLES_TO_COPY:
            self.assertFalse(
                bq_utils.table_exists(
                    table, self.combined_dataset_id))  # sanity check
            copy_rdr_table(table)
            actual = bq_utils.table_exists(table, self.combined_dataset_id)
            self.assertTrue(
                actual,
                msg='RDR table {table} should be copied'.format(table=table))

            # Check that row count in combined is same as rdr
            query = (
                'WITH rdr AS '
                ' (SELECT COUNT(1) n FROM `{rdr_dataset_id}.{table}`), '
                'combined AS '
                ' (SELECT COUNT(1) n FROM `{combined_dataset_id}.{table}`) '
                'SELECT '
                'rdr.n AS rdr_count, '
                'combined.n AS combined_count '
                'FROM rdr, combined ').format(
                    rdr_dataset_id=self.rdr_dataset_id,
                    combined_dataset_id=self.combined_dataset_id,
                    table=table)
            response = bq_utils.query(query)
            rows = bq_utils.response2rows(response)
            self.assertTrue(len(rows) == 1)  # sanity check
            row = rows[0]
            rdr_count, combined_count = row['rdr_count'], row['combined_count']
            msg_fmt = 'Table {table} has {rdr_count} in rdr and {combined_count} in combined (expected to be equal)'
            self.assertEqual(
                rdr_count, combined_count,
                msg_fmt.format(table=table,
                               rdr_count=rdr_count,
                               combined_count=combined_count))
Exemplo n.º 2
0
    def test_ehr_person_to_observation(self):
        # ehr person table converts to observation records
        create_cdm_tables()
        copy_rdr_table('person')
        move_ehr_person_to_observation()
        # person table query
        q_person = '''
            SELECT (person_id,
                    gender_concept_id,
                    gender_source_value,
                    race_concept_id,
                    race_source_value,
                    CAST(birth_datetime as STRING),
                    ethnicity_concept_id,
                    ethnicity_source_value,
                    EXTRACT(DATE FROM birth_datetime))
            FROM {ehr_dataset_id}.person
        '''.format(ehr_dataset_id=self.ehr_dataset_id)
        response_ehr_person = [[
            item['v'] for item in row['f']
        ] for row in query_result_to_payload(bq_utils.query(q_person))['F0_']]
        q_obs = '''
            SELECT (person_id,
                    observation_concept_id,
                    value_as_concept_id,
                    value_as_string,
                    observation_source_value,
                    observation_date)
            FROM {ehr_dataset_id}.observation obs
            WHERE   obs.observation_concept_id=4013886 -- Race - 4013886
                OR  obs.observation_concept_id=4271761 -- Ethnic group - 4271761
                OR  obs.observation_concept_id=4135376 -- Gender - 4135376
                OR  obs.observation_concept_id=4083587 -- DOB - 4083587
        '''.format(ehr_dataset_id=self.combined_dataset_id)
        response_obs = [[
            item['v'] for item in row['f']
        ] for row in query_result_to_payload(bq_utils.query(q_obs))['F0_']]
        # concept ids
        gender_concept_id = '4135376'
        race_concept_id = '4013886'
        dob_concept_id = '4083587'
        ethnicity_concept_id = '4271761'

        # expected lists
        expected_gender_list = [(row[0], gender_concept_id, row[1], row[8])
                                for row in response_ehr_person]
        expected_race_list = [(row[0], race_concept_id, row[3], row[8])
                              for row in response_ehr_person]
        expected_dob_list = [(row[0], dob_concept_id, row[5], row[8])
                             for row in response_ehr_person]
        expected_ethnicity_list = [(row[0], ethnicity_concept_id, row[6],
                                    row[8]) for row in response_ehr_person]

        # actual lists
        actual_gender_list = [(row[0], row[1], row[2], row[5])
                              for row in response_obs
                              if row[1] == gender_concept_id]
        actual_race_list = [(row[0], row[1], row[2], row[5])
                            for row in response_obs
                            if row[1] == race_concept_id]
        actual_dob_list = [(row[0], row[1], row[3], row[5])
                           for row in response_obs if row[1] == dob_concept_id]
        actual_ethnicity_list = [(row[0], row[1], row[2], row[5])
                                 for row in response_obs
                                 if row[1] == ethnicity_concept_id]

        self.assertListEqual(sorted(expected_gender_list),
                             sorted(actual_gender_list), 'gender check fails')
        self.assertListEqual(sorted(expected_race_list),
                             sorted(actual_race_list), 'race check fails')
        self.assertListEqual(sorted(expected_dob_list),
                             sorted(actual_dob_list), 'dob check fails')
        self.assertListEqual(sorted(expected_ethnicity_list),
                             sorted(actual_ethnicity_list),
                             'ethnicity check fails')

        person_ehr_row_count = int(
            bq_utils.get_table_info('person', self.ehr_dataset_id)['numRows'])
        obs_row_count = int(
            bq_utils.get_table_info('observation',
                                    self.combined_dataset_id)['numRows'])

        self.assertEqual(person_ehr_row_count * 4, obs_row_count)