def test_compare_counties(data_clean, gdf_processed): neighbors = get_neighbors("Greene", gdf_processed) compare_list = ["Greene"] + neighbors data_clean_deaths = data_clean["deaths"] clean_rules = DATA_INDEX["deaths"]["clean_rules"] result = compare_counties( data_clean_deaths, clean_rules=clean_rules, compare_field="moving_avg_per_capita", counties=compare_list, ) print("\n", result)
def test_process_neighbors_per_capita(data_clean, gdf_processed): neighbors = get_neighbors("Dauphin", gdf_processed) compare_list = ["Dauphin"] + neighbors data_clean_cases = data_clean["cases"] clean_rules = DATA_INDEX["cases"]["clean_rules"] df = compare_counties( data_clean_cases, clean_rules=clean_rules, compare_field="moving_avg_per_capita", counties=compare_list, ) df = df.set_index("date") print(df)
def greene_region_deaths_moving_avg_per_cap(data_clean, gdf_processed) -> pd.DataFrame: """ A DataFrame representing a day-by-day comparison of moving avg number of new daily cases, per capita, for Dauphin and neighboring counties. """ neighbors = get_neighbors("Greene", gdf_processed) compare_list = ["Greene"] + neighbors data_clean_deaths = data_clean["deaths"] clean_rules = DATA_INDEX["deaths"]["clean_rules"] return compare_counties( data_clean_deaths, clean_rules=clean_rules, compare_field="moving_avg_per_capita", counties=compare_list, )
def test_process_neighbors_total(data_clean, gdf_processed): neighbors = get_neighbors("Dauphin", gdf_processed) compare_list = ["Dauphin"] + neighbors data_clean_cases = data_clean["cases"] clean_rules = DATA_INDEX["cases"]["clean_rules"] df = compare_counties( data_clean_cases, clean_rules=clean_rules, compare_field="total", counties=compare_list, ) df = df.set_index("date") cases_lebanon_july_26 = df.at["2020-07-26", "lebanon"] cases_lebanon_july_23 = df.at["2020-07-23", "cumberland"] assert cases_lebanon_july_26 == 1544 assert cases_lebanon_july_23 == 1066
def gen_desc_neighbors(self, *, data_type: str) -> str: neighbor_list = get_neighbors(self.county_name_clean, self.gdf) neighbor_count = len(neighbor_list) region_list = [self.county_name_clean] + neighbor_list region_gdf = self.gdf[self.gdf["NAME"].isin(region_list)] rank_from_top, rank_from_bottom, others_with_same_rank = self.gdf_get_ranking( region_gdf, f"{data_type}_added_past_two_weeks_per_capita") per_capita_rank_among_neighbors = rank_text(rank_from_top, rank_from_bottom) if others_with_same_rank > 0 and others_with_same_rank != neighbor_count: sentence_frag = ( f"[b]{others_with_same_rank}[/b] other " f"{p.singular_noun('counties', others_with_same_rank)} had the same per capita rate of" f" {data_type}.") else: sentence_frag = "" return ( f"Compared to its {p.number_to_words(neighbor_count)} neighboring counties, {self.county_name_clean} " f"County had the [b]{per_capita_rank_among_neighbors}[/b] number of {data_type} per 100,000 people over " f"the past two weeks. {sentence_frag} Here's how {self.county_name_clean}'s per capita 7-day moving average compares to " f"its neighbors:")
def gen_chart( county_name_clean: str, data_type: str, *, data_index: Dict, chart_dict: Dict, data_clean: Dict, county_data: Dict[str, pd.DataFrame], gdf: geopandas.GeoDataFrame, primary_color: str, secondary_color: str, aws_bucket: str, aws_dir: str, ) -> Dict[str, Union[Union[str, None, List[Dict[str, str]]], Any]]: """ Creates a chart PNG using Altair and moves its to s3. Returns an URL to the image, a Dict representing a legend for the chart Args: county_name_clean (str): Name of county without 'County' suffix. Eg. "Dauphin" data_type (str): Type of data. Eg. "cases". data_index (Dict): Config settings for data. chart_dict (Dict): Config settings for chart. data_clean (Dict[str, pd.DataFrame]): Dict of pandas dfs of cases, deaths, tests data for all Pa. counties that has has some minimal cleaning. county_data (Dict[str, pd.DataFrame]: Processed cases, deaths, tests, etc data for a specific county. gdf (geopandas.GeoDataFrame): Pa geodataframe with cases, deaths, tests data merged on to it. primary_color (str): Hex code for color theme. secondary_color (str): Hex code for color theme. aws_bucket (str): AWS bucket where charts will be uploaded to. Defaults to value stored in definitions.py aws_dir (str): Directory within AWS bucket where charts will be uploaded. Defaults to value stored in definitions.py Returns: Dict[str, Union[Union[str, None, List[Dict[str, str]]], Any]]: Dict with keys relating to chart, legend, and chart descriptive text. """ chart_type = chart_dict["type"] custom_legend = None fmt = "png" content_type = "image/png" gen_desc = GenStats(county_name_clean, gdf=gdf) if "daily_and_avg" in chart_type: chart = chart_bar_and_line( data_type=data_type, df=county_data[data_type], line_color=primary_color, bar_color=secondary_color, ) custom_legend = chart_dict.get("custom_legend") chart_desc = gen_desc.gen_desc_daily(data_type=data_type) elif "choropleth" in chart_type: chart = map_choropleth( gdf, color_field=chart_dict["color_field"], highlight_polygon=county_name_clean, min_color=secondary_color, max_color=primary_color, legend_title=chart_dict["legend_title"], ) chart_desc = gen_desc.gen_desc_choro(data_type=data_type) elif "neigbhors_per_capita" in chart_type: compare_field = chart_dict["compare_field"] neighbors = get_neighbors(county_name_clean, gdf) neighbors = sort_counties_by_pop(neighbors) compare_list = [county_name_clean] + neighbors df_data_type = data_clean[data_type] clean_rules = data_index[data_type]["clean_rules"] df_multi_county = compare_counties( df_data_type, clean_rules=clean_rules, compare_field=compare_field, counties=compare_list, ) county_cols = list(df_multi_county.columns) county_cols.remove("date") df_multi_county = stack_df(df_multi_county, stack_cols=county_cols, x_axis_col="date") chart = chart_faceted( df_multi_county, category_col="category", x_axis_col="date", y_axis_col="value", line_color=primary_color, ) custom_legend = None chart_desc = gen_desc.gen_desc_neighbors(data_type=data_type) elif "stacked_area" in chart_type: df = process_cumulative_tests(county_data["confirmed"], county_data["tests"]) chart = chart_stacked_area( df, x_axis_col="date", y_axis_col="count", category_col="data_type", domain=["positive", "negative"], range_=[primary_color, secondary_color], ) custom_legend = chart_dict.get("custom_legend") chart_desc = gen_desc.gen_desc_area_tests() else: raise Exception( "Chart type not found. Did you provide a valid chart type in chart_index?" ) image_filename = f"{county_name_clean.lower()}_{data_type}_{chart_type}.{fmt}" image_path = DIR_OUTPUT / image_filename save(chart, str(image_path)) logging.info("...saved") # Move to s3 copy_to_s3(image_path, aws_bucket, aws_dir, content_type=content_type) return { "title": chart_dict.get("title", "").upper(), "custom_legend": custom_legend, "image_path": f"https://{aws_bucket}/{aws_dir}/{image_filename}", "description": chart_desc, }
def test_get_neighbors(gdf_processed): neighbors = get_neighbors("Dauphin", gdf_processed) assert "Lebanon" in neighbors assert "Allegheny" not in neighbors print(neighbors)