Exemplo n.º 1
0
def ingest_sample_frame_goi_filter_options():
    headers = {'Authorization': 'Bearer {}'.format(AIRTABLE_API_KEY)}
    data = airtable_get_request(AIRTABLE_SAMPLE_FRAME_GOI_OPTIONS_REQUEST_URL, headers)
    # Try to get records from data if the request was successful
    try:
        # Get sorted records
        records = data["records"]
        current_time = datetime.now()
        # Add records to table 
        with db_session() as session:
            records_to_add = [PopulationGroupOptions(id=uuid4(),
                                                     name=record['fields']['Name'],
                                                     french_name=record['fields']['French Name'] if 'French Name' in
                                                                 record['fields'] else None,
                                                     german_name=record['fields']['German Name'] if 'German Name' in
                                                                 record['fields'] else None,
                                                     order=record['fields']['Order'] if 'Order' in record[
                                                         'fields'] else None,
                                                     created_at=current_time)
                              for record in records if record['fields']]
            session.bulk_save_objects(records_to_add)
            session.commit()
        return
    except KeyError as e:
        handle_airtable_error(e, data, AIRTABLE_SAMPLE_FRAME_GOI_OPTIONS_REQUEST_URL, headers)
        return
Exemplo n.º 2
0
def get_all_filter_options() -> Dict[str, Any]:
    with db_session() as session:
        options = get_filter_static_options()

        # Get countries
        query = session.query(distinct(getattr(Country, "country_name")))
        results = [q[0] for q in query if q[0] is not None]
        # sort countries in alpha order
        options["country"] = sorted(results)

        # Get genpop
        query = session.query(distinct(ResearchSource.genpop))
        results = [q[0] for q in query if q[0] is not None]
        options["genpop"] = sorted(results)

        # Get subgroup_var
        query = session.query(distinct(DashboardSource.subgroup_var))
        results = [q[0] for q in query if q[0] is not None]
        options["subgroup_var"] = sorted(results)

        # Get subgroup_cat
        query = session.query(distinct(ResearchSource.subgroup_cat))
        results = [q[0] for q in query if q[0] is not None]
        options["subgroup_cat"] = sorted(results)

        # Get state
        query = session.query(distinct(State.state_name))
        results = [q[0] for q in query if q[0] is not None]
        options["state"] = sorted(results)

        # Get city
        query = session.query(distinct(City.city_name))
        results = [q[0] for q in query if q[0] is not None]
        options["city"] = sorted(results)

        # Only surface Spike and Nucleocapsid anitbody target options because only options that are relevant for
        # interpreting seroprev data in the context of vaccines
        query = session.query(distinct(AntibodyTarget.antibody_target_name))
        results = [q[0] for q in query if q[0] is not None and q[0] in ['Spike', 'Nucleocapsid (N-protein)']]
        options["antibody_target"] = sorted(results)

        options["max_sampling_end_date"] = session.query(func.max(DashboardSource.sampling_end_date))[0][0].isoformat()
        options["min_sampling_end_date"] = session.query(func.min(DashboardSource.sampling_end_date))[0][0].isoformat()
        options["max_publication_end_date"] = session.query(func.max(DashboardSource.publication_date))[0][
            0].isoformat()
        options["min_publication_end_date"] = session.query(func.min(DashboardSource.publication_date))[0][
            0].isoformat()
        options["updated_at"] = session.query(func.max(DashboardSource.publication_date))[0][0].isoformat()

        # Get population group options
        results = session.query(PopulationGroupOptions.order, PopulationGroupOptions.name, PopulationGroupOptions.french_name, PopulationGroupOptions.german_name)
        # result[0]: Order associated with filter option, records are sorted by this Order
        # result[1]: English translation of filter option
        # result[2]: French translation of filter option
        options["population_group"] = [{"english": result[1], "french": result[2], "german": result[3]} for result in sorted(results, key=lambda result: result[0])]
        
        return options
Exemplo n.º 3
0
 def get_table_counts(self):
     table_counts = {}
     with db_session() as session:
         for table in [DashboardSource, ResearchSource, Country, State, City,
                       TestManufacturer, AntibodyTarget, CityBridge, StateBridge,
                       TestManufacturerBridge, AntibodyTargetBridge]:
             # Inspect primary key of table
             pk = inspect(table).primary_key[0].key
             # Use primary key to get row count instead of using the .count() method for perf reasons
             table_counts[table.__tablename__] = session.query(func.count(getattr(table, pk))).scalar()
     return table_counts
Exemplo n.º 4
0
def drop_table_entries(current_time: datetime, drop_old: bool = True):
    for table_name in table_names_dict:
        table = table_names_dict[table_name]
        with db_session() as session:
            if drop_old:
                # Drop old records if type is old
                session.query(table).filter(or_(table.created_at != current_time, table.created_at.is_(None))).delete()
            else:
                # Drop new records if type is new
                session.query(table).filter(or_(table.created_at == current_time, table.created_at.is_(None))).delete()
            session.commit()
    return
Exemplo n.º 5
0
def get_record_details(source_id):
    with db_session() as session:
        try:
            # Construct case when expression to generate isotype column based on isotype bool cols
            isotype_case_expression = _get_isotype_col_expression()

            # Build list of columns to use in query starting with airtable source columns
            fields_list = []
            for col in dashboard_source_cols:
                fields_list.append(getattr(DashboardSource, col))

            # Store info about supplementary tables to join to airtable source
            table_infos = db_model_config['supplementary_table_info']

            # Add columns from supplementary tables and add isotype col expression
            for sup_table in table_infos:
                fields_list.append(getattr(sup_table['main_table'], f"{sup_table['entity']}_name").label(sup_table['entity']))
            fields_list.append(getattr(Country, 'country_name').label('country'))

            query = session.query(*fields_list, isotype_case_expression)

            # Build query to join supplementary tables to airtable source
            for sup_table in table_infos:
                main_table = sup_table['main_table']
                bridge_table = sup_table['bridge_table']
                entity_id = f"{sup_table['entity']}_id"
                query = query.outerjoin(bridge_table, bridge_table.source_id == DashboardSource.source_id)\
                    .outerjoin(main_table, getattr(bridge_table, entity_id) == getattr(main_table, entity_id))

            # Join on country table
            query = query.outerjoin(Country, Country.country_id == DashboardSource.country_id)

            # Filter by input source id and convert results to dicts
            query = query.filter(DashboardSource.source_id == source_id)
            result = query.all()
            result = [x._asdict() for x in result]

            # If multiple records are returned, parse results to return one record
            if len(result) > 1:
                result = _get_parsed_record(result)
            else:
                result = result[0]

            # Convert dates to use isoformat
            if result['sampling_end_date'] is not None:
                result['sampling_end_date'] = result['sampling_end_date'].isoformat()
            if result['sampling_start_date'] is not None:
                result['sampling_start_date'] = result['sampling_start_date'].isoformat()

        except Exception as e:
            print(e)
    return result
Exemplo n.º 6
0
def add_test_adjustments(df: pd.DataFrame) -> pd.DataFrame:
    # Query record ids in our database
    with db_session() as session:
        total_db_records = session.query(DashboardSource.serum_pos_prevalence,
                                         DashboardSource.test_adj,
                                         DashboardSource.sensitivity,
                                         DashboardSource.specificity,
                                         DashboardSource.test_type,
                                         DashboardSource.denominator_value,
                                         DashboardSource.adj_prevalence,
                                         ResearchSource.ind_se,
                                         ResearchSource.ind_sp,
                                         ResearchSource.ind_se_n,
                                         ResearchSource.ind_sp_n,
                                         ResearchSource.se_n,
                                         ResearchSource.sp_n,
                                         ResearchSource.test_validation,
                                         ResearchSource.airtable_record_id) \
            .join(ResearchSource, ResearchSource.source_id == DashboardSource.source_id, isouter=True).all()
        total_db_records = [q._asdict() for q in total_db_records]
        total_db_records = pd.DataFrame(data=total_db_records)

    # Concat old and new records and fillna with 0 (NaN and None become 0 so it is standardized)
    diff = pd.concat([df, total_db_records])
    diff.fillna(0, inplace=True)

    # Convert numeric cols to float (some of these come out of airtable as strings so need to standardize types)
    float_cols = [
        'ind_se', 'ind_sp', 'ind_se_n', 'ind_sp_n', 'se_n', 'sp_n',
        'sensitivity', 'specificity', 'denominator_value',
        'serum_pos_prevalence'
    ]
    diff[float_cols] = diff[float_cols].astype(float)

    # Round float columns to a consistent number of decimal places to ensure consistent float comparisons
    diff[float_cols] = diff[float_cols].round(5)

    # Drop duplicates based on these cols
    duplicate_cols = [
        'airtable_record_id', 'test_adj', 'ind_se', 'ind_sp', 'ind_se_n',
        'ind_sp_n', 'se_n', 'sp_n', 'sensitivity', 'specificity',
        'test_validation', 'test_type', 'denominator_value',
        'serum_pos_prevalence'
    ]
    diff = diff.drop_duplicates(subset=duplicate_cols, keep=False)

    # Get all unique airtable_record_ids that are new/have been modified
    new_airtable_record_ids = diff['airtable_record_id'].unique()

    # Get all rows from airtable data that need to be test adjusted, and ones that don't
    old_airtable_test_adj_records = \
        df[~df['airtable_record_id'].isin(new_airtable_record_ids)].reset_index(
            drop=True)
    new_airtable_test_adj_records = \
        df[df['airtable_record_id'].isin(new_airtable_record_ids)].reset_index(
            drop=True)

    # Add temporary boolean column if record will be test adjusted or not
    old_airtable_test_adj_records['test_adjusted_record'] = False
    new_airtable_test_adj_records['test_adjusted_record'] = True

    # Only proceed with test adjustment if there are new unadjusted records
    if not new_airtable_test_adj_records.empty:
        # Apply test adjustment to the new_test_adj_records and add 6 new columns
        test_adj_handler = TestAdjHandler()
        new_airtable_test_adj_records['adj_prevalence'], \
        new_airtable_test_adj_records['adj_sensitivity'], \
        new_airtable_test_adj_records['adj_specificity'], \
        new_airtable_test_adj_records['ind_eval_type'], \
        new_airtable_test_adj_records['adj_prev_ci_lower'], \
        new_airtable_test_adj_records['adj_prev_ci_upper'] = \
            zip(*new_airtable_test_adj_records.apply(
                lambda x: test_adj_handler.get_adjusted_estimate(test_adj=x['test_adj'],
                                                                 ind_se=x['ind_se'],
                                                                 ind_sp=x['ind_sp'],
                                                                 ind_se_n=x['ind_se_n'],
                                                                 ind_sp_n=x['ind_sp_n'],
                                                                 se_n=x['se_n'],
                                                                 sp_n=x['sp_n'],
                                                                 sensitivity=x['sensitivity'],
                                                                 specificity=x['specificity'],
                                                                 test_validation=x['test_validation'],
                                                                 test_type=x['test_type'],
                                                                 denominator_value=x['denominator_value'],
                                                                 serum_pos_prevalence=x['serum_pos_prevalence']),
                axis=1))

    # If there are no old test adjusted records, just return the new ones
    if old_airtable_test_adj_records.empty:
        return new_airtable_test_adj_records

    # Add test adjustment data to old_test_adj_records from database
    old_airtable_record_ids = old_airtable_test_adj_records[
        'airtable_record_id'].unique()

    # Query record ids in our database
    with db_session() as session:
        old_db_test_adj_records = session.query(DashboardSource.adj_prevalence,
                                                DashboardSource.adj_prev_ci_lower,
                                                DashboardSource.adj_prev_ci_upper,
                                                ResearchSource.adj_sensitivity,
                                                ResearchSource.adj_specificity,
                                                ResearchSource.ind_eval_type,
                                                ResearchSource.airtable_record_id) \
            .join(ResearchSource, ResearchSource.source_id == DashboardSource.source_id, isouter=True) \
            .filter(ResearchSource.airtable_record_id.in_(old_airtable_record_ids)).all()
        old_db_test_adj_records = [
            q._asdict() for q in old_db_test_adj_records
        ]
        old_db_test_adj_records = pd.DataFrame(data=old_db_test_adj_records)

    # Drop the test adjustment data that is currently in airtable, and keep the one int he DB
    old_airtable_test_adj_records = old_airtable_test_adj_records.drop(
        columns=[
            'adj_prevalence', 'adj_prev_ci_lower', 'adj_prev_ci_upper',
            'adj_sensitivity', 'adj_specificity', 'ind_eval_type'
        ])

    # Join old_airtable_test_adj_records with old_db_adjusted_records
    old_airtable_test_adj_records = \
        old_airtable_test_adj_records.join(old_db_test_adj_records.set_index('airtable_record_id'),
                                           on='airtable_record_id')

    # Concat the old and new airtable test adj records
    airtable_master_data = pd.concat(
        [new_airtable_test_adj_records, old_airtable_test_adj_records])

    # Write any newly test adjusted records back to airtable
    # Only want to write back to airtable in prod! Don't want to mess up AT if devs make a mistake on local
    if app.config['WRITE_TO_AIRTABLE']:
        batch_update_airtable_records(new_airtable_test_adj_records, [
            'Adjusted serum positive prevalence',
            'Adjusted serum pos prevalence, 95pct CI Lower',
            'Adjusted serum pos prevalence, 95pct CI Upper',
            'Adjusted sensitivity', 'Adjusted specificity',
            'Independent evaluation type'
        ])
    return airtable_master_data
Exemplo n.º 7
0
def get_all_records(research_fields=False, include_disputed_regions=False,
                    unity_aligned_only=False, include_records_without_latlngs=False):
    with db_session() as session:
        # Get all records for now, join on all tables
        table_infos = db_model_config['supplementary_table_info']

        # Add columns from dashboard source to select statement
        fields_list = [DashboardSource.source_id]
        for field_string in dashboard_source_cols:
            fields_list.append(getattr(DashboardSource, field_string))

        # If research fields is True, add columns from research source to select statement
        if research_fields:
            for col in research_source_cols:
                fields_list.append(getattr(ResearchSource, col))

        # Alias for country name and iso3 code
        fields_list.append(Country.country_name.label("country"))
        fields_list.append(Country.country_iso3.label("country_iso3"))
        fields_list.append(Country.income_class.label("income_class"))
        fields_list.append(Country.hrp_class.label("hrp_class"))

        # Will need to group by every field that isn't array aggregated
        # using copy here since python assigns list variables by ref instead of by value
        groupby_fields = fields_list.copy()

        for table_info in table_infos:
            # The label method returns an alias for the column being queried
            # Use case: We want to get fields from the bridge table without the _name suffix
            fields_list.append(_apply_agg_query(getattr(table_info["main_table"], f"{table_info['entity']}_name"),
                                                table_info['entity']))

        query = session.query(*fields_list, _get_isotype_col_expression(label="isotypes_reported"))

        # There are entries that have multiple field values for a certain entity
        # e.g., an entry may be associated with two different age groups, "Youth (13-17)" and "Children (0-12)"
        # Gather up all of these rows
        for table_info in table_infos:
            bridge_table = table_info["bridge_table"]
            main_table = table_info["main_table"]
            entity = f"{table_info['entity']}_id"
            try:
                query = query.join(bridge_table, getattr(bridge_table, "source_id") ==
                                   DashboardSource.source_id, isouter=True) \
                    .join(main_table, getattr(main_table, entity) == getattr(bridge_table, entity), isouter=True)
            except Exception as e:
                print(e)

        # Join on country table
        query = query.join(Country, Country.country_id == DashboardSource.country_id, isouter=True)

        # If research fields is true, join to research source table
        if research_fields:
            query = query.join(ResearchSource, ResearchSource.source_id == DashboardSource.source_id)

        # Filter out estimates in disputed areas if necessary
        if not include_disputed_regions:
            query = query.filter(DashboardSource.in_disputed_area == False)

        # Filter out non unity aligned studies if necessary
        if unity_aligned_only:
            query = query.filter(DashboardSource.is_unity_aligned == True)

        # Filter out records without latlngs
        if not include_records_without_latlngs:
            query = query.filter(DashboardSource.pin_latitude.isnot(None)). \
                filter(DashboardSource.pin_longitude.isnot(None))

        # Need to apply group by so that array_agg works as expected
        query = query.group_by(*groupby_fields)

        query = query.all()
        # Convert from sqlalchemy object to dict
        query_dict = [q._asdict() for q in query]

        return query_dict