def ingest_sample_frame_goi_filter_options(): headers = {'Authorization': 'Bearer {}'.format(AIRTABLE_API_KEY)} data = airtable_get_request(AIRTABLE_SAMPLE_FRAME_GOI_OPTIONS_REQUEST_URL, headers) # Try to get records from data if the request was successful try: # Get sorted records records = data["records"] current_time = datetime.now() # Add records to table with db_session() as session: records_to_add = [PopulationGroupOptions(id=uuid4(), name=record['fields']['Name'], french_name=record['fields']['French Name'] if 'French Name' in record['fields'] else None, german_name=record['fields']['German Name'] if 'German Name' in record['fields'] else None, order=record['fields']['Order'] if 'Order' in record[ 'fields'] else None, created_at=current_time) for record in records if record['fields']] session.bulk_save_objects(records_to_add) session.commit() return except KeyError as e: handle_airtable_error(e, data, AIRTABLE_SAMPLE_FRAME_GOI_OPTIONS_REQUEST_URL, headers) return
def get_all_filter_options() -> Dict[str, Any]: with db_session() as session: options = get_filter_static_options() # Get countries query = session.query(distinct(getattr(Country, "country_name"))) results = [q[0] for q in query if q[0] is not None] # sort countries in alpha order options["country"] = sorted(results) # Get genpop query = session.query(distinct(ResearchSource.genpop)) results = [q[0] for q in query if q[0] is not None] options["genpop"] = sorted(results) # Get subgroup_var query = session.query(distinct(DashboardSource.subgroup_var)) results = [q[0] for q in query if q[0] is not None] options["subgroup_var"] = sorted(results) # Get subgroup_cat query = session.query(distinct(ResearchSource.subgroup_cat)) results = [q[0] for q in query if q[0] is not None] options["subgroup_cat"] = sorted(results) # Get state query = session.query(distinct(State.state_name)) results = [q[0] for q in query if q[0] is not None] options["state"] = sorted(results) # Get city query = session.query(distinct(City.city_name)) results = [q[0] for q in query if q[0] is not None] options["city"] = sorted(results) # Only surface Spike and Nucleocapsid anitbody target options because only options that are relevant for # interpreting seroprev data in the context of vaccines query = session.query(distinct(AntibodyTarget.antibody_target_name)) results = [q[0] for q in query if q[0] is not None and q[0] in ['Spike', 'Nucleocapsid (N-protein)']] options["antibody_target"] = sorted(results) options["max_sampling_end_date"] = session.query(func.max(DashboardSource.sampling_end_date))[0][0].isoformat() options["min_sampling_end_date"] = session.query(func.min(DashboardSource.sampling_end_date))[0][0].isoformat() options["max_publication_end_date"] = session.query(func.max(DashboardSource.publication_date))[0][ 0].isoformat() options["min_publication_end_date"] = session.query(func.min(DashboardSource.publication_date))[0][ 0].isoformat() options["updated_at"] = session.query(func.max(DashboardSource.publication_date))[0][0].isoformat() # Get population group options results = session.query(PopulationGroupOptions.order, PopulationGroupOptions.name, PopulationGroupOptions.french_name, PopulationGroupOptions.german_name) # result[0]: Order associated with filter option, records are sorted by this Order # result[1]: English translation of filter option # result[2]: French translation of filter option options["population_group"] = [{"english": result[1], "french": result[2], "german": result[3]} for result in sorted(results, key=lambda result: result[0])] return options
def get_table_counts(self): table_counts = {} with db_session() as session: for table in [DashboardSource, ResearchSource, Country, State, City, TestManufacturer, AntibodyTarget, CityBridge, StateBridge, TestManufacturerBridge, AntibodyTargetBridge]: # Inspect primary key of table pk = inspect(table).primary_key[0].key # Use primary key to get row count instead of using the .count() method for perf reasons table_counts[table.__tablename__] = session.query(func.count(getattr(table, pk))).scalar() return table_counts
def drop_table_entries(current_time: datetime, drop_old: bool = True): for table_name in table_names_dict: table = table_names_dict[table_name] with db_session() as session: if drop_old: # Drop old records if type is old session.query(table).filter(or_(table.created_at != current_time, table.created_at.is_(None))).delete() else: # Drop new records if type is new session.query(table).filter(or_(table.created_at == current_time, table.created_at.is_(None))).delete() session.commit() return
def get_record_details(source_id): with db_session() as session: try: # Construct case when expression to generate isotype column based on isotype bool cols isotype_case_expression = _get_isotype_col_expression() # Build list of columns to use in query starting with airtable source columns fields_list = [] for col in dashboard_source_cols: fields_list.append(getattr(DashboardSource, col)) # Store info about supplementary tables to join to airtable source table_infos = db_model_config['supplementary_table_info'] # Add columns from supplementary tables and add isotype col expression for sup_table in table_infos: fields_list.append(getattr(sup_table['main_table'], f"{sup_table['entity']}_name").label(sup_table['entity'])) fields_list.append(getattr(Country, 'country_name').label('country')) query = session.query(*fields_list, isotype_case_expression) # Build query to join supplementary tables to airtable source for sup_table in table_infos: main_table = sup_table['main_table'] bridge_table = sup_table['bridge_table'] entity_id = f"{sup_table['entity']}_id" query = query.outerjoin(bridge_table, bridge_table.source_id == DashboardSource.source_id)\ .outerjoin(main_table, getattr(bridge_table, entity_id) == getattr(main_table, entity_id)) # Join on country table query = query.outerjoin(Country, Country.country_id == DashboardSource.country_id) # Filter by input source id and convert results to dicts query = query.filter(DashboardSource.source_id == source_id) result = query.all() result = [x._asdict() for x in result] # If multiple records are returned, parse results to return one record if len(result) > 1: result = _get_parsed_record(result) else: result = result[0] # Convert dates to use isoformat if result['sampling_end_date'] is not None: result['sampling_end_date'] = result['sampling_end_date'].isoformat() if result['sampling_start_date'] is not None: result['sampling_start_date'] = result['sampling_start_date'].isoformat() except Exception as e: print(e) return result
def add_test_adjustments(df: pd.DataFrame) -> pd.DataFrame: # Query record ids in our database with db_session() as session: total_db_records = session.query(DashboardSource.serum_pos_prevalence, DashboardSource.test_adj, DashboardSource.sensitivity, DashboardSource.specificity, DashboardSource.test_type, DashboardSource.denominator_value, DashboardSource.adj_prevalence, ResearchSource.ind_se, ResearchSource.ind_sp, ResearchSource.ind_se_n, ResearchSource.ind_sp_n, ResearchSource.se_n, ResearchSource.sp_n, ResearchSource.test_validation, ResearchSource.airtable_record_id) \ .join(ResearchSource, ResearchSource.source_id == DashboardSource.source_id, isouter=True).all() total_db_records = [q._asdict() for q in total_db_records] total_db_records = pd.DataFrame(data=total_db_records) # Concat old and new records and fillna with 0 (NaN and None become 0 so it is standardized) diff = pd.concat([df, total_db_records]) diff.fillna(0, inplace=True) # Convert numeric cols to float (some of these come out of airtable as strings so need to standardize types) float_cols = [ 'ind_se', 'ind_sp', 'ind_se_n', 'ind_sp_n', 'se_n', 'sp_n', 'sensitivity', 'specificity', 'denominator_value', 'serum_pos_prevalence' ] diff[float_cols] = diff[float_cols].astype(float) # Round float columns to a consistent number of decimal places to ensure consistent float comparisons diff[float_cols] = diff[float_cols].round(5) # Drop duplicates based on these cols duplicate_cols = [ 'airtable_record_id', 'test_adj', 'ind_se', 'ind_sp', 'ind_se_n', 'ind_sp_n', 'se_n', 'sp_n', 'sensitivity', 'specificity', 'test_validation', 'test_type', 'denominator_value', 'serum_pos_prevalence' ] diff = diff.drop_duplicates(subset=duplicate_cols, keep=False) # Get all unique airtable_record_ids that are new/have been modified new_airtable_record_ids = diff['airtable_record_id'].unique() # Get all rows from airtable data that need to be test adjusted, and ones that don't old_airtable_test_adj_records = \ df[~df['airtable_record_id'].isin(new_airtable_record_ids)].reset_index( drop=True) new_airtable_test_adj_records = \ df[df['airtable_record_id'].isin(new_airtable_record_ids)].reset_index( drop=True) # Add temporary boolean column if record will be test adjusted or not old_airtable_test_adj_records['test_adjusted_record'] = False new_airtable_test_adj_records['test_adjusted_record'] = True # Only proceed with test adjustment if there are new unadjusted records if not new_airtable_test_adj_records.empty: # Apply test adjustment to the new_test_adj_records and add 6 new columns test_adj_handler = TestAdjHandler() new_airtable_test_adj_records['adj_prevalence'], \ new_airtable_test_adj_records['adj_sensitivity'], \ new_airtable_test_adj_records['adj_specificity'], \ new_airtable_test_adj_records['ind_eval_type'], \ new_airtable_test_adj_records['adj_prev_ci_lower'], \ new_airtable_test_adj_records['adj_prev_ci_upper'] = \ zip(*new_airtable_test_adj_records.apply( lambda x: test_adj_handler.get_adjusted_estimate(test_adj=x['test_adj'], ind_se=x['ind_se'], ind_sp=x['ind_sp'], ind_se_n=x['ind_se_n'], ind_sp_n=x['ind_sp_n'], se_n=x['se_n'], sp_n=x['sp_n'], sensitivity=x['sensitivity'], specificity=x['specificity'], test_validation=x['test_validation'], test_type=x['test_type'], denominator_value=x['denominator_value'], serum_pos_prevalence=x['serum_pos_prevalence']), axis=1)) # If there are no old test adjusted records, just return the new ones if old_airtable_test_adj_records.empty: return new_airtable_test_adj_records # Add test adjustment data to old_test_adj_records from database old_airtable_record_ids = old_airtable_test_adj_records[ 'airtable_record_id'].unique() # Query record ids in our database with db_session() as session: old_db_test_adj_records = session.query(DashboardSource.adj_prevalence, DashboardSource.adj_prev_ci_lower, DashboardSource.adj_prev_ci_upper, ResearchSource.adj_sensitivity, ResearchSource.adj_specificity, ResearchSource.ind_eval_type, ResearchSource.airtable_record_id) \ .join(ResearchSource, ResearchSource.source_id == DashboardSource.source_id, isouter=True) \ .filter(ResearchSource.airtable_record_id.in_(old_airtable_record_ids)).all() old_db_test_adj_records = [ q._asdict() for q in old_db_test_adj_records ] old_db_test_adj_records = pd.DataFrame(data=old_db_test_adj_records) # Drop the test adjustment data that is currently in airtable, and keep the one int he DB old_airtable_test_adj_records = old_airtable_test_adj_records.drop( columns=[ 'adj_prevalence', 'adj_prev_ci_lower', 'adj_prev_ci_upper', 'adj_sensitivity', 'adj_specificity', 'ind_eval_type' ]) # Join old_airtable_test_adj_records with old_db_adjusted_records old_airtable_test_adj_records = \ old_airtable_test_adj_records.join(old_db_test_adj_records.set_index('airtable_record_id'), on='airtable_record_id') # Concat the old and new airtable test adj records airtable_master_data = pd.concat( [new_airtable_test_adj_records, old_airtable_test_adj_records]) # Write any newly test adjusted records back to airtable # Only want to write back to airtable in prod! Don't want to mess up AT if devs make a mistake on local if app.config['WRITE_TO_AIRTABLE']: batch_update_airtable_records(new_airtable_test_adj_records, [ 'Adjusted serum positive prevalence', 'Adjusted serum pos prevalence, 95pct CI Lower', 'Adjusted serum pos prevalence, 95pct CI Upper', 'Adjusted sensitivity', 'Adjusted specificity', 'Independent evaluation type' ]) return airtable_master_data
def get_all_records(research_fields=False, include_disputed_regions=False, unity_aligned_only=False, include_records_without_latlngs=False): with db_session() as session: # Get all records for now, join on all tables table_infos = db_model_config['supplementary_table_info'] # Add columns from dashboard source to select statement fields_list = [DashboardSource.source_id] for field_string in dashboard_source_cols: fields_list.append(getattr(DashboardSource, field_string)) # If research fields is True, add columns from research source to select statement if research_fields: for col in research_source_cols: fields_list.append(getattr(ResearchSource, col)) # Alias for country name and iso3 code fields_list.append(Country.country_name.label("country")) fields_list.append(Country.country_iso3.label("country_iso3")) fields_list.append(Country.income_class.label("income_class")) fields_list.append(Country.hrp_class.label("hrp_class")) # Will need to group by every field that isn't array aggregated # using copy here since python assigns list variables by ref instead of by value groupby_fields = fields_list.copy() for table_info in table_infos: # The label method returns an alias for the column being queried # Use case: We want to get fields from the bridge table without the _name suffix fields_list.append(_apply_agg_query(getattr(table_info["main_table"], f"{table_info['entity']}_name"), table_info['entity'])) query = session.query(*fields_list, _get_isotype_col_expression(label="isotypes_reported")) # There are entries that have multiple field values for a certain entity # e.g., an entry may be associated with two different age groups, "Youth (13-17)" and "Children (0-12)" # Gather up all of these rows for table_info in table_infos: bridge_table = table_info["bridge_table"] main_table = table_info["main_table"] entity = f"{table_info['entity']}_id" try: query = query.join(bridge_table, getattr(bridge_table, "source_id") == DashboardSource.source_id, isouter=True) \ .join(main_table, getattr(main_table, entity) == getattr(bridge_table, entity), isouter=True) except Exception as e: print(e) # Join on country table query = query.join(Country, Country.country_id == DashboardSource.country_id, isouter=True) # If research fields is true, join to research source table if research_fields: query = query.join(ResearchSource, ResearchSource.source_id == DashboardSource.source_id) # Filter out estimates in disputed areas if necessary if not include_disputed_regions: query = query.filter(DashboardSource.in_disputed_area == False) # Filter out non unity aligned studies if necessary if unity_aligned_only: query = query.filter(DashboardSource.is_unity_aligned == True) # Filter out records without latlngs if not include_records_without_latlngs: query = query.filter(DashboardSource.pin_latitude.isnot(None)). \ filter(DashboardSource.pin_longitude.isnot(None)) # Need to apply group by so that array_agg works as expected query = query.group_by(*groupby_fields) query = query.all() # Convert from sqlalchemy object to dict query_dict = [q._asdict() for q in query] return query_dict