def noaa_parse(*, year, **_): """ Combine, parse, and format the provided dataframes :param year: year :return: df, parsed and partially formatted to flowbyactivity specifications """ # Read directly into a pandas df df_raw = pd.read_csv(externaldatapath + "foss_landings.csv") # read state fips from common.py df_state = get_state_FIPS().reset_index(drop=True) df_state['State'] = df_state["State"].str.lower() # modify fish state names to match those from common df = df_raw.drop('Sum Pounds', axis=1) df['State'] = df["State"].str.lower() # filter by year df = df[df['Year'] == int(year)] # noaa differentiates between florida east and west, # which is not necessary for our purposes df['State'] = df['State'].str.replace(r'-east', '') df['State'] = df['State'].str.replace(r'-west', '') # sum florida data after casting rows as numeric df['Sum Dollars'] = df['Sum Dollars'].str.replace(r',', '') df["Sum Dollars"] = df["Sum Dollars"].apply(pd.to_numeric) df2 = df.groupby(['Year', 'State'], as_index=False).agg({"Sum Dollars": sum}) # new column includes state fips df3 = df2.merge(df_state[["State", "FIPS"]], how="left", left_on="State", right_on="State") # data includes "process at sea", which is not associated with any # fips, assign value of '99' if fips is nan, add the state name to # description and drop state name df3['Description'] = None df3.loc[df3['State'] == 'process at sea', 'Description'] = df3['State'] df3.loc[df3['State'] == 'process at sea', 'FIPS'] = 99 df4 = df3.drop('State', axis=1) # rename columns to match flowbyactivity format df4 = df4.rename(columns={"Sum Dollars": "FlowAmount", "FIPS": "Location"}) # hardcode data df4["Class"] = "Money" df4["SourceName"] = "NOAA_Landings" df4["FlowName"] = None df4 = assign_fips_location_system(df4, year) df4["Unit"] = "$" df4["ActivityProducedBy"] = "All Species" df4['DataReliability'] = 5 # tmp df4['DataCollection'] = 5 # tmp return df4
def create_geoscale_list(df, geoscale, year='2015'): """ Create a list of FIPS associated with given geoscale :param df: FlowBySector of FlowByActivity df :param geoscale: 'national', 'state', or 'county' :return: list of relevant FIPS """ # filter by geoscale depends on Location System fips = [] if geoscale == "national": fips.append(US_FIPS) elif df['LocationSystem'].str.contains('FIPS').any(): # all_FIPS = read_stored_FIPS() if geoscale == "state": state_FIPS = get_state_FIPS(year) fips = list(state_FIPS['FIPS']) elif geoscale == "county": county_FIPS = get_county_FIPS(year) fips = list(county_FIPS['FIPS']) return fips