示例#1
0
def test_get_testing_df_by_state():
    positive_field = CovidTrackingDataSource.Fields.POSITIVE_TESTS
    negative_field = CovidTrackingDataSource.Fields.NEGATIVE_TESTS
    df = build_processed_dataset.get_testing_timeseries_by_state("MA")
    assert positive_field in df.columns
    assert negative_field in df.columns
    # TODO(tom): have build_processed_dataset return a real datetime dtype so callers don't need to parse a string.
    df[CommonFields.DATE] = pd.to_datetime(df[CommonFields.DATE],
                                           format="%m/%d/%y")
    df.set_index(CommonFields.DATE, inplace=True)
    # No joke, our only source of state-level timeseries testing data is CDSDataset and it has NaN for MA until April 6.
    assert 0 < df.at["2020-04-10", positive_field] < df.at["2020-05-01",
                                                           positive_field]
    assert 0 < df.at["2020-04-10", negative_field] < df.at["2020-05-01",
                                                           negative_field]
示例#2
0
def generate_state_timeseries(
    projection_row, intervention, input_dir
) -> CovidActNowStateTimeseries:
    state = US_STATE_ABBREV[projection_row[rc.STATE_FULL_NAME]]
    fips = projection_row[rc.FIPS]
    raw_dataseries = get_can_projection.get_can_raw_data(
        input_dir, state, fips, AggregationLevel.STATE, intervention
    )

    # join in state testing data onto the timeseries
    # left join '%m/%d/%y', so the left join gracefully handles
    # missing state testing data (i.e. NE)
    testing_df = get_testing_timeseries_by_state(state)
    new_df = pd.DataFrame(raw_dataseries).merge(testing_df, on="date", how="left")
    can_dataseries = new_df.to_dict(orient="records")

    timeseries = []
    for data_series in can_dataseries:
        timeseries.append(_generate_state_timeseries_row(data_series))

    projections = _generate_api_for_projections(projection_row)
    if len(timeseries) < 1:
        raise Exception(f"State time series empty for {intervention.name}")

    state_intervention = get_can_projection.get_intervention_for_state(state)
    actuals_ts = combined_datasets.build_us_timeseries_with_all_fields()
    actual_latest = combined_datasets.build_us_latest_with_all_fields()
    state_latest = actual_latest.get_record_for_state(state)

    return CovidActNowStateTimeseries(
        population=state_latest[CommonFields.POPULATION],
        lat=projection_row[rc.LATITUDE],
        long=projection_row[rc.LONGITUDE],
        actuals=_generate_actuals(
            state_latest, state_intervention
        ),
        stateName=projection_row[rc.STATE_FULL_NAME],
        fips=projection_row[rc.FIPS],
        lastUpdatedDate=_format_date(projection_row[rc.LAST_UPDATED]),
        projections=projections,
        timeseries=timeseries,
        actuals_timeseries=_generate_actuals_timeseries(
            actuals_ts.get_records_for_state(state), state_intervention
        ),
    )
示例#3
0
def test_get_testing_df():
    testing_df = build_processed_dataset.get_testing_timeseries_by_state('MA')
    assert testing_df is not None