Пример #1
0
def get_usa_state_shapefile(shp, shx, dbf):
    shp_writer = shapefile.Writer(shp=shp, shx=shx, dbf=dbf)
    public_data_url = get_public_data_base_url()
    public_data_path = _file_uri_to_path(public_data_url)
    join_and_output_shapefile(get_usa_by_states_df(),
        shapefile.Reader(f'{public_data_path}/data/shapefiles-uscensus/tl_2019_us_state'),
        'STATEFP', 'State/County FIPS Code', shp_writer)
Пример #2
0
def get_usa_by_county_df():
    url = '{}/data/cases-jhu/csse_covid_19_daily_reports/{}.csv'.format(
        get_public_data_base_url(), latest.strftime("%m-%d-%Y"))
    raw_df = pd.read_csv(url, dtype={"FIPS": str})
    raw_df['FIPS'] = raw_df['FIPS'].astype(str).str.zfill(5)

    column_mapping = {"Province_State": "Province/State",
                    "Country_Region": "Country/Region",
                    "Last_Update": "Last Update",
                    "Lat": "Latitude",
                    "Long_": "Longitude",
                    "Combined_Key": "Combined Key",
                    "Admin2": "County",
                    "FIPS": "State/County FIPS Code"
                    }
    remapped_df = raw_df.rename(columns=column_mapping)

    # USA only
    us_df = remapped_df[(remapped_df["Country/Region"] == "US")]

    final_df = pd.DataFrame(us_df, columns=output_cols)
    final_df['Last Update'] = pd.to_datetime(final_df['Last Update'])
    final_df['Last Update'] = final_df['Last Update'].dt.strftime(
        '%-m/%-d/%Y %H:%M')

    final_df['County'] = final_df['County'].replace(county_replace_with_null)
    final_df['Combined Key'] = final_df['Combined Key'].str.replace('Unassigned, ','')
    final_df = final_df.fillna(NULL_VALUE)

    final_df.index.name = 'OBJECTID'
    # assert unique key test
    assert final_df['Combined Key'].value_counts().max() == 1

    return final_df
Пример #3
0
def get_usa_county_shapefile(shp, shx, dbf):
    shp_writer = shapefile.Writer(shp=shp, shx=shx, dbf=dbf)
    public_data_url = get_public_data_base_url()
    public_data_path = _file_uri_to_path(public_data_url)

    join_and_output_shapefile(get_usa_by_county_with_projection_df(),
        shapefile.Reader(f'{public_data_path}/data/shapefiles-uscensus/tl_2019_us_county'),
        'GEOID', 'State/County FIPS Code', shp_writer)
def _get_usa_by_county_df():
    # TODO: read this from a dataset class
    url = "{}/data/cases-jhu/csse_covid_19_daily_reports/{}.csv".format(
        get_public_data_base_url(), latest.strftime("%m-%d-%Y"))
    raw_df = pd.read_csv(url, dtype={"FIPS": str})
    raw_df["FIPS"] = raw_df["FIPS"].astype(str).str.zfill(5)

    column_mapping = {
        "Province_State": "Province/State",
        "Country_Region": "Country/Region",
        "Last_Update": "Last Update",
        "Lat": "Latitude",
        "Long_": "Longitude",
        "Combined_Key": "Combined Key",
        "Admin2": "County",
        "FIPS": "State/County FIPS Code",
    }
    remapped_df = raw_df.rename(columns=column_mapping)

    # USA only
    us_df = remapped_df[(remapped_df["Country/Region"] == "US")]
    jhu_column_names = [
        "Province/State",
        "Country/Region",
        "Last Update",
        "Latitude",
        "Longitude",
        "Confirmed",
        "Recovered",
        "Deaths",
        "Active",
        "County",
        "State/County FIPS Code",
        "Combined Key",
        # Incident rate and people tested do not seem to be available yet
        # "Incident Rate",
        # "People Tested",
    ]
    final_df = pd.DataFrame(us_df, columns=jhu_column_names)
    final_df["Last Update"] = pd.to_datetime(final_df["Last Update"])
    final_df["Last Update"] = final_df["Last Update"].dt.strftime(
        "%-m/%-d/%Y %H:%M")

    final_df["County"] = final_df["County"].replace(county_replace_with_null)
    final_df["Combined Key"] = final_df["Combined Key"].str.replace(
        "Unassigned, ", "")
    final_df = final_df.fillna(NULL_VALUE)
    final_df = final_df.drop_duplicates(
        "State/County FIPS Code"
    )  # note this is a hack, 49053 is dupped in JHU data :(
    final_df.index.name = "OBJECTID"
    # assert unique key test
    assert final_df["Combined Key"].value_counts().max() == 1
    assert final_df["State/County FIPS Code"].value_counts().max() == 1

    return final_df
Пример #5
0
def get_usa_state_shapefile(use_state_df, shp, shx, dbf):
    shp_writer = shapefile.Writer(shp=shp, shx=shx, dbf=dbf)
    public_data_url = get_public_data_base_url()
    public_data_path = _file_uri_to_path(public_data_url)
    join_and_output_shapefile(
        use_state_df,
        shapefile.Reader(
            f"{public_data_path}/data/shapefiles-uscensus/tl_2019_us_state"),
        "STATEFP",
        "State/County FIPS Code",
        shp_writer,
    )
Пример #6
0
def get_usa_state_shapefile(
        use_state_df) -> Tuple[io.BytesIO, io.BytesIO, io.BytesIO]:
    shp = io.BytesIO()
    shx = io.BytesIO()
    dbf = io.BytesIO()

    shp_writer = shapefile.Writer(shp=shp, shx=shx, dbf=dbf)
    public_data_url = get_public_data_base_url()
    public_data_path = _file_uri_to_path(public_data_url)
    join_and_output_shapefile(
        use_state_df,
        shapefile.Reader(
            f"{public_data_path}/data/shapefiles-uscensus/tl_2019_us_state"),
        "STATEFP",
        CommonFields.FIPS,
        shp_writer,
    )
    return shp, shx, dbf
Пример #7
0
def get_usa_county_shapefile(
        county_df) -> Tuple[io.BytesIO, io.BytesIO, io.BytesIO]:
    shp = io.BytesIO()
    shx = io.BytesIO()
    dbf = io.BytesIO()

    shp_writer = shapefile.Writer(shp=shp, shx=shx, dbf=dbf)
    public_data_url = get_public_data_base_url()
    public_data_path = _file_uri_to_path(public_data_url)

    join_and_output_shapefile(
        county_df,
        shapefile.Reader(
            f"{public_data_path}/data/shapefiles-uscensus/tl_2019_us_county"),
        "GEOID",
        "State/County FIPS Code",
        shp_writer,
    )
    return shp, shx, dbf