def _test_table_hpo_subquery(self): # person is a simple select, no ids should be mapped person = ehr_union.table_hpo_subquery( 'person', hpo_id=CHS_HPO_ID, input_dataset_id='input', output_dataset_id='output') # _mapping_visit_occurrence(src_table_id, src_visit_occurrence_id, visit_occurrence_id) # visit_occurrence_id should be mapped visit_occurrence = ehr_union.table_hpo_subquery( 'visit_occurrence', hpo_id=CHS_HPO_ID, input_dataset_id='input', output_dataset_id='output') # visit_occurrence_id and condition_occurrence_id should be mapped condition_occurrence = ehr_union.table_hpo_subquery( 'condition_occurrence', hpo_id=CHS_HPO_ID, input_dataset_id='input', output_dataset_id='output')
def get_table_hpo_subquery_error(self, table, dataset_in, dataset_out): subquery = ehr_union.table_hpo_subquery(table, NYC_HPO_ID, dataset_in, dataset_out) # moz-sql-parser doesn't support the ROW_NUMBER() OVER() a analytical function of sql we are removing # that statement from the returned query for the parser be able to parse out the query without erroring out. subquery = re.sub( r",\s+ROW_NUMBER\(\) OVER \(PARTITION BY nm\..+?_id\) AS row_num", " ", subquery) stmt = moz_sql_parser.parse(subquery) # Sanity check it is a select statement if 'select' not in stmt: return SUBQUERY_FAIL_MSG.format(expr='query type', table=table, expected='select', actual=str(stmt), subquery=subquery) # Input table should be first in FROM expression actual_from = first_or_none( dpath.util.values(stmt, 'from/0/value/from/value') or dpath.util.values(stmt, 'from')) expected_from = dataset_in + '.' + bq_utils.get_table_id( NYC_HPO_ID, table) if expected_from != actual_from: return SUBQUERY_FAIL_MSG.format(expr='first object in FROM', table=table, expected=expected_from, actual=actual_from, subquery=subquery) # Ensure all key fields (primary or foreign) yield joins with their associated mapping tables # Note: ordering of joins in the subquery is assumed to be consistent with field order in the json file fields = resources.fields_for(table) id_field = table + '_id' key_ind = 0 expected_join = None actual_join = None for field in fields: if field['name'] in self.mapped_fields: # key_ind += 1 # TODO use this increment when we generalize solution for all foreign keys if field['name'] == id_field: # Primary key, mapping table associated with this one should be INNER joined key_ind += 1 expr = 'inner join on primary key' actual_join = first_or_none( dpath.util.values(stmt, 'from/%s/join/value' % key_ind)) expected_join = dataset_out + '.' + ehr_union.mapping_table_for( table) elif field['name'] in self.implemented_foreign_keys: # Foreign key, mapping table associated with the referenced table should be LEFT joined key_ind += 1 expr = 'left join on foreign key' actual_join = first_or_none( dpath.util.values(stmt, 'from/%s/left join/value' % key_ind)) joined_table = field['name'].replace('_id', '') expected_join = dataset_out + '.' + ehr_union.mapping_table_for( joined_table) if expected_join != actual_join: return SUBQUERY_FAIL_MSG.format(expr=expr, table=table, expected=expected_join, actual=actual_join, subquery=subquery)
def get_table_hpo_subquery_error(self, table, dataset_in, dataset_out): subquery = ehr_union.table_hpo_subquery(table, NYC_HPO_ID, dataset_in, dataset_out) # moz-sql-parser doesn't support the ROW_NUMBER() OVER() a analytical function of sql we are removing # that statement from the returned query for the parser be able to parse out the query without erroring out. subquery = re.sub( r",\s+ROW_NUMBER\(\) OVER \(PARTITION BY nm\..+?_id\) AS row_num", " ", subquery) # offset is being used as a column-name in note_nlp table. # Although, BigQuery does not throw any errors for this, moz_sql_parser indentifies as a SQL Keyword. # So, change required only in Test Script as a workaround. if 'offset,' in subquery: subquery = subquery.replace('offset,', '"offset",') stmt = moz_sql_parser.parse(subquery) # Sanity check it is a select statement if 'select' not in stmt: return SUBQUERY_FAIL_MSG.format(expr='query type', table=table, expected='select', actual=str(stmt), subquery=subquery) # Input table should be first in FROM expression actual_from = first_or_none( dpath.util.values(stmt, 'from/0/value/from/value') or dpath.util.values(stmt, 'from')) expected_from = dataset_in + '.' + bq_utils.get_table_id( NYC_HPO_ID, table) if expected_from != actual_from: return SUBQUERY_FAIL_MSG.format(expr='first object in FROM', table=table, expected=expected_from, actual=actual_from, subquery=subquery) # Ensure all key fields (primary or foreign) yield joins with their associated mapping tables # Note: ordering of joins in the subquery is assumed to be consistent with field order in the json file fields = resources.fields_for(table) id_field = table + '_id' key_ind = 0 expected_join = None actual_join = None for field in fields: if field['name'] in self.mapped_fields: # key_ind += 1 # TODO use this increment when we generalize solution for all foreign keys if field['name'] == id_field: # Primary key, mapping table associated with this one should be INNER joined key_ind += 1 expr = 'inner join on primary key' actual_join = first_or_none( dpath.util.values(stmt, 'from/%s/join/value' % key_ind)) expected_join = dataset_out + '.' + ehr_union.mapping_table_for( table) elif field['name'] in self.implemented_foreign_keys: # Foreign key, mapping table associated with the referenced table should be LEFT joined key_ind += 1 expr = 'left join on foreign key' # Visit_detail table has 'visit_occurrence' column after 'care_site', which is different from # other cdm tables, where 'visit_occurrence' comes before other foreign_keys. # The test expects the same order as other cmd tables, so the expected-query has # 'visit_occurrence' before 'care_site'. The following reorder is required to match the sequence # to the actual-query. if table == 'visit_detail' and key_ind == 2: stmt['from'][2], stmt['from'][3] = stmt['from'][ 3], stmt['from'][2] actual_join = first_or_none( dpath.util.values(stmt, 'from/%s/left join/value' % key_ind)) joined_table = field['name'].replace('_id', '') expected_join = dataset_out + '.' + ehr_union.mapping_table_for( joined_table) if expected_join != actual_join: return SUBQUERY_FAIL_MSG.format(expr=expr, table=table, expected=expected_join, actual=actual_join, subquery=subquery)