Пример #1
0
def Census_pop_URL_helper(build_url, config, args):
    urls = []

    # get date code for july 1 population numbers
    for k, v in config['datecodes'].items():
        if str(args['year']) == str(k):
            dc = str(v)

    # the url for 2010 and earlier is different
    url2000 = 'https://api.census.gov/data/2000/pep/int_population?get=POP,DATE_DESC&for=__aggLevel__:*&DATE_=12&key=__apiKey__'

    # state fips required for county level 13-14
    FIPS_2 = get_all_state_FIPS_2()['FIPS_2']
    # drop puerto rico
    FIPS_2 = FIPS_2[FIPS_2 != '72']

    for c in config['agg_levels']:
        # this timeframe requires state fips at the county level in the url
        if '2010' < args['year'] < '2015' and c == 'county':
            for b in FIPS_2:
                url = build_url
                url = url.replace("__aggLevel__", c)
                url = url.replace("__DateCode__", dc)
                url = url.replace("population?&", 'cty?')
                url = url.replace("DATE_CODE", 'DATE_')
                url = url.replace("&key", "&in=state:" + b + "&key")
                urls.append(url)
        else:
            if args['year'] > '2010':
                url = build_url
                url = url.replace("__aggLevel__", c)
                url = url.replace("__DateCode__", dc)
                # url date variable different pre 2018
                if args['year'] < '2018':
                    url = url.replace("DATE_CODE", 'DATE_')
                # url for 2011 - 2014 slightly modified
                if args['year'] < '2015' and c != 'county':
                    url = url.replace("population?&", 'natstprc?')
                urls.append(url)
            elif args['year'] == '2010':
                url = url2000
                url = url.replace("__aggLevel__", c)
                url = url.replace("&in=state:__stateFIPS__", '')
                if c == "us":
                    url = url.replace("*", "1")
                userAPIKey = load_api_key(
                    config['api_name'])  # (common.py fxn)
                url = url.replace("__apiKey__", userAPIKey)
                urls.append(url)
    return urls
Пример #2
0
def Census_CBP_URL_helper(build_url, config, args):
    urls_census = []
    FIPS_2 = get_all_state_FIPS_2()['FIPS_2']
    for c in FIPS_2:
        url = build_url
        url = url.replace("__stateFIPS__", c)
        # specified NAICS code year depends on year of data
        if args["year"] in ['2017']:
            url = url.replace("__NAICS__", "NAICS2017")
        if args["year"] in ['2012', '2013', '2014', '2015', '2016']:
            url = url.replace("__NAICS__", "NAICS2012")
        if args["year"] in ['2010', '2011']:
            url = url.replace("__NAICS__", "NAICS2007")
        urls_census.append(url)
    return urls_census
Пример #3
0
def BLS_QCEW_URL_helper(build_url, config, args):
    urls = []
    FIPS_2 = get_all_state_FIPS_2()['FIPS_2']

    # the url for 2013 earlier is different than the base url (and is a zip file)
    if args["year"] < '2014':
        url = build_url
        url = url.replace('api', 'files')
        url = url.replace('a/area/__areaFIPS__.csv', 'csv/' + args["year"] + '_annual_by_area.zip')
        urls.append(url)
    else:
        for c in FIPS_2:
            url = build_url
            url = url.replace('__areaFIPS__', c + '000')
            urls.append(url)
    return urls
Пример #4
0
def mlu_parse(*, df_list, source, year, **_):
    """
    Combine, parse, and format the provided dataframes
    :param df_list: list of dataframes to concat and format
    :param args: dictionary, used to run flowbyactivity.py
        ('year' and 'source')
    :return: df, parsed and partially formatted to flowbyactivity
        specifications
    """
    # concat dataframes
    df = pd.concat(df_list,
                   sort=False).drop(columns=['SortOrder', 'Region']).rename(
                       columns={'Region or State': 'State'})

    # use "melt" fxn to convert colummns into rows
    df = df.melt(id_vars=["State", "Year"],
                 var_name="FlowName",
                 value_name="FlowAmount")

    # load fips codes and merge
    fips = get_all_state_FIPS_2()
    fips['State'] = fips['State'].apply(lambda x: x.title())
    fips['FIPS_2'] = fips['FIPS_2'] + '000'
    dfm = df.merge(fips, how='left').rename(columns={'FIPS_2': 'Location'})
    dfm['Location'] = np.where(dfm['State'] == "U.S. total", US_FIPS,
                               dfm['Location'])

    # drop null values
    dfm2 = dfm[~dfm['Location'].isnull()].reset_index(drop=True)
    dfm3 = assign_fips_location_system(dfm2, year)
    # sub by year
    dfm3['Year'] = dfm3['Year'].astype(str)
    dfm3 = dfm3[dfm3['Year'] == year].reset_index(drop=True)

    dfm3["Class"] = "Land"
    dfm3["SourceName"] = source
    dfm3["ActivityProducedBy"] = None
    dfm3["ActivityConsumedBy"] = dfm3['FlowName']
    dfm3['FlowType'] = 'ELEMENTARY_FLOW'
    dfm3["Compartment"] = 'ground'
    dfm3["Unit"] = "Thousand Acres"
    dfm3['DataReliability'] = 5  # tmp
    dfm3['DataCollection'] = 5  # tmp

    return dfm3
Пример #5
0
def blm_pls_parse(**kwargs):
    """
    Combine, parse, and format the provided dataframes
    :param kwargs: potential arguments include:
                   dataframe_list: list of dataframes to concat and format
                   args: dictionary, used to run flowbyactivity.py ('year' and 'source')
    :return: df, parsed and partially formatted to flowbyactivity specifications
    """
    # load arguments necessary for function
    dataframe_list = kwargs['dataframe_list']
    args = kwargs['args']

    Location = []
    fips = get_all_state_FIPS_2()
    df = pd.concat(dataframe_list, sort=False)
    df = df.drop(df[df.FlowAmount == ""].index)
    for index, row in df.iterrows():
        if row['LocationStr'] == "Total":
            Location.append("00000")
        else:
            for i, fips_row in fips.iterrows():
                if fips_row["State"] == row['LocationStr']:
                    Location.append(fips_row["FIPS_2"] + "000")
    df = df.drop(columns=["LocationStr"])

    # standardize activity names
    df = standardize_blm_pls_activity_names(df)

    # replace withdrawn code
    df.loc[df['FlowAmount'] == "Q", 'FlowAmount'] = WITHDRAWN_KEYWORD
    df.loc[df['FlowAmount'] == "N", 'FlowAmount'] = WITHDRAWN_KEYWORD
    df['FlowName'] = df['ActivityConsumedBy'].copy()
    df['Location'] = Location
    df["Class"] = 'Land'
    df['Compartment'] = "ground"
    df["LocationSystem"] = "FIPS_2010"
    df["SourceName"] = 'BLM_PLS'
    df['Year'] = args["year"]
    df['Unit'] = "Acres"
    df['FlowType'] = 'ELEMENTARY_FLOW'
    df['DataReliability'] = 5  # tmp
    df['DataCollection'] = 5  #tmp

    return df
Пример #6
0
def mlu_parse(**kwargs):
    """
    Combine, parse, and format the provided dataframes
    :param kwargs: potential arguments include:
                   dataframe_list: list of dataframes to concat and format
                   args: dictionary, used to run flowbyactivity.py ('year' and 'source')
    :return: df, parsed and partially formatted to flowbyactivity specifications
    """
    # load arguments necessary for function
    dataframe_list = kwargs['dataframe_list']
    args = kwargs['args']

    output = pd.DataFrame()
    # concat dataframes
    df = pd.concat(dataframe_list, sort=False)
    data = {}
    df_columns = df.columns.tolist()
    location = ""

    fips = get_all_state_FIPS_2()
    for index, row in df.iterrows():
        if int(row["Year"]) == int(args['year']):
            if (row["Region or State"] != "Northeast") & \
                    (row["Region or State"] != "Lake States") & \
                    (row["Region or State"] != "Corn Belt") & \
                    (row["Region or State"] != "Northern Plains") & \
                    (row["Region or State"] != "Appalachian") & \
                    (row["Region or State"] != "Southeast") & \
                    (row["Region or State"] != "Delta States") & \
                    (row["Region or State"] != "Southern Plains") & \
                    (row["Region or State"] != "Mountain") & \
                    (row["Region or State"] != "Pacific") & \
                    (row["Region or State"] != "48 States"):
                if row['Region or State'] == "U.S. total":
                    location = "00000"
                else:
                    for i, fips_row in fips.iterrows():
                        if fips_row["State"] == row['Region or State']:
                            location = fips_row["FIPS_2"] + "000"

                for col in df_columns:
                    if (col != "SortOrder") & (col != "Region") & \
                            (col != "Region or State") & (col != "Year"):
                        data["Class"] = "Land"
                        data["SourceName"] = "USDA_ERS_MLU"
                        # flownames are the same as ActivityConsumedBy for
                        # purposes of mapping elementary flows
                        data['FlowName'] = col
                        data["FlowAmount"] = int(row[col])
                        data["ActivityProducedBy"] = None
                        data["ActivityConsumedBy"] = col
                        data['FlowType'] = 'ELEMENTARY_FLOW'
                        data["Compartment"] = 'ground'
                        data["Location"] = location
                        data["Year"] = int(args['year'])
                        data["Unit"] = "Thousand Acres"
                        data['DataReliability'] = 5  # tmp
                        data['DataCollection'] = 5  # tmp
                        output = output.append(data, ignore_index=True)
    output = assign_fips_location_system(output, args['year'])

    return output
Пример #7
0
def Census_pop_URL_helper(**kwargs):
    """
    This helper function uses the "build_url" input from flowbyactivity.py, which
    is a base url for data imports that requires parts of the url text string
    to be replaced with info specific to the data year.
    This function does not parse the data, only modifies the urls from which data is obtained.
    :param kwargs: potential arguments include:
                   build_url: string, base url
                   config: dictionary, items in FBA method yaml
                   args: dictionary, arguments specified when running flowbyactivity.py
                   flowbyactivity.py ('year' and 'source')
    :return: list, urls to call, concat, parse, format into Flow-By-Activity format
    """

    # load the arguments necessary for function
    build_url = kwargs['build_url']
    config = kwargs['config']
    args = kwargs['args']

    urls = []

    # get date code for july 1 population numbers
    for k, v in config['datecodes'].items():
        if str(args['year']) == str(k):
            dc = str(v)

    # the url for 2010 and earlier is different
    url2000 = 'https://api.census.gov/data/2000/pep/int_population?get=' \
              'POP,DATE_DESC&for=__aggLevel__:*&DATE_=12&key=__apiKey__'

    # state fips required for county level 13-14
    FIPS_2 = get_all_state_FIPS_2()['FIPS_2']
    # drop puerto rico
    FIPS_2 = FIPS_2[FIPS_2 != '72']

    for c in config['agg_levels']:
        # this timeframe requires state fips at the county level in the url
        if '2010' < args['year'] < '2015' and c == 'county':
            for b in FIPS_2:
                url = build_url
                url = url.replace("__aggLevel__", c)
                url = url.replace("__DateCode__", dc)
                url = url.replace("population?&", 'cty?')
                url = url.replace("DATE_CODE", 'DATE_')
                url = url.replace("&key", "&in=state:" + b + "&key")
                urls.append(url)
        else:
            if args['year'] > '2010':
                url = build_url
                url = url.replace("__aggLevel__", c)
                url = url.replace("__DateCode__", dc)
                # url date variable different pre 2018
                if args['year'] < '2018':
                    url = url.replace("DATE_CODE", 'DATE_')
                # url for 2011 - 2014 slightly modified
                if args['year'] < '2015' and c != 'county':
                    url = url.replace("population?&", 'natstprc?')
                urls.append(url)
            elif args['year'] == '2010':
                url = url2000
                url = url.replace("__aggLevel__", c)
                url = url.replace("&in=state:__stateFIPS__", '')
                if c == "us":
                    url = url.replace("*", "1")
                userAPIKey = load_api_key(config['api_name'])  # (common.py fxn)
                url = url.replace("__apiKey__", userAPIKey)
                urls.append(url)
    return urls
Пример #8
0
def fiws_parse(**kwargs):
    """
    Combine, parse, and format the provided dataframes
    :param kwargs: potential arguments include:
                   dataframe_list: list of dataframes to concat and format
                   args: dictionary, used to run flowbyactivity.py ('year' and 'source')
    :return: df, parsed and partially formatted to flowbyactivity specifications
    """
    # load arguments necessary for function
    dataframe_list = kwargs['dataframe_list']
    args = kwargs['args']

    # concat dataframes
    df = pd.concat(dataframe_list, sort=False)
    # select data for chosen year, cast year as string to match argument
    df['Year'] = df['Year'].astype(str)
    df = df[df['Year'] == args['year']].reset_index(drop=True)
    # add state fips codes, reading in datasets from common.py
    fips = get_all_state_FIPS_2().reset_index(drop=True)
    # ensure capitalization of state names
    fips['State'] = fips['State'].apply(lambda x: x.title())
    fips['StateAbbrev'] = fips['State'].map(us_state_abbrev)
    # pad zeroes
    fips['FIPS_2'] = fips['FIPS_2'].apply(lambda x: x.ljust(3 + len(x), '0'))
    df = pd.merge(df, fips, how='left', left_on='State', right_on='StateAbbrev')
    # set us location code
    df.loc[df['State_x'] == 'US', 'FIPS_2'] = US_FIPS
    # drop "All" in variabledescription2
    df.loc[df['VariableDescriptionPart2'] == 'All', 'VariableDescriptionPart2'] = 'drop'
    # combine variable descriptions to create Activity name and remove ", drop"
    df['ActivityProducedBy'] = df['VariableDescriptionPart1'] + \
                               ', ' + df['VariableDescriptionPart2']
    df['ActivityProducedBy'] = df['ActivityProducedBy'].str.replace(", drop", "", regex=True)
    # trim whitespace
    df['ActivityProducedBy'] = df['ActivityProducedBy'].str.strip()
    # drop columns
    df = df.drop(columns=['artificialKey', 'PublicationDate', 'Source', 'ChainType_GDP_Deflator',
                          'VariableDescriptionPart1', 'VariableDescriptionPart2',
                          'State_x', 'State_y', 'StateAbbrev', 'unit_desc'])
    # rename columns
    df = df.rename(columns={"VariableDescriptionTotal": "Description",
                            "Amount": "FlowAmount",
                            "FIPS_2": "Location"})
    # assign flowname, based on comma placement
    df['FlowName'] = df['Description'].str.split(',').str[0]
    # add location system based on year of data
    df['Year'] = df['Year'].astype(int)
    df.loc[df['Year'] >= 2019, 'LocationSystem'] = 'FIPS_2019'
    df.loc[df['Year'].between(2015, 2018), 'LocationSystem'] = 'FIPS_2015'
    df.loc[df['Year'].between(2013, 2014), 'LocationSystem'] = 'FIPS_2013'
    df.loc[df['Year'].between(2010, 2012), 'LocationSystem'] = 'FIPS_2010'
    # drop unnecessary rows
    df = df[df['FlowName'].str.contains("Cash receipts")]
    # the unit is $1000 USD, so multiply FlowAmount by 1000 and set unit as 'USD'
    df['FlowAmount'] = df['FlowAmount'].astype(float)
    df['FlowAmount'] = df['FlowAmount'] * 1000
    # hard code data
    df['Class'] = 'Money'
    df['SourceName'] = 'USDA_ERS_FIWS'
    df['Unit'] = 'USD'
    # Add DQ scores
    df['DataReliability'] = 5  # tmp
    df['DataCollection'] = 5  # tmp
    # sort df
    df = df.sort_values(['Location', 'FlowName'])
    # reset index
    df.reset_index(drop=True, inplace=True)

    return df
Пример #9
0
def Census_CBP_URL_helper(*, build_url, year, **_):
    """
    This helper function uses the "build_url" input from flowbyactivity.py,
    which is a base url for data imports that requires parts of the url text
    string to be replaced with info specific to the data year. This function
    does not parse the data, only modifies the urls from which data
    is obtained.
    :param build_url: string, base url
    :param year: year
    :return: list, urls to call, concat, parse, format into
        Flow-By-Activity format
    """
    urls_census = []
    # This section gets the census data by county instead of by state.
    # This is only for years 2010 and 2011. This is done because the State
    # query that gets all counties returns too many results and errors out.
    if year in ['2010', '2011']:
        if year == '2011':
            fips_year = '2010'
        else:
            fips_year = '2010'
        county_fips_df = get_county_FIPS(fips_year)
        county_fips = county_fips_df.FIPS
        for d in county_fips:
            url = build_url
            state_digit = str(d[0]) + str(d[1])
            county_digit = str(d[2]) + str(d[3]) + str(d[4])
            url = url.replace("__NAICS__", "NAICS2007")
            url = url.replace("__stateFIPS__", state_digit)
            url = url.replace("__countyFIPS__", county_digit)

            if year == "2010":
                # These are the counties where data is not available.
                # s signifies state code and y indicates year.
                s_02_y_10 = ["105", "195", "198", "230", "275"]
                s_15_y_10 = ["005"]
                s_48_y_10 = ["269"]

                # There are specific counties in various states for the year
                # 2010 that do not have data. For these counties a URL is not
                # generated as if there is no data then an error occurs.
                if state_digit == "02" and county_digit in s_02_y_10 or \
                        state_digit == "15" and county_digit in s_15_y_10 or \
                        state_digit == "48" and county_digit in s_48_y_10:
                    pass
                else:
                    urls_census.append(url)
            else:
                # These are the counties where data is not available.
                # s signifies state code and y indicates year.
                s_02_y_11 = ["105", "195", "198", "230", "275"]
                s_15_y_11 = ["005"]
                s_48_y_11 = ["269", "301"]

                # There are specific counties in various states for the year
                # 2011 that do not have data. For these counties a URL is
                # not generated as if there is no data then an error occurs.
                if state_digit == "02" and county_digit in s_02_y_11 or \
                        state_digit == "15" and county_digit in s_15_y_11 or \
                        state_digit == "48" and county_digit in s_48_y_11:
                    pass
                else:
                    urls_census.append(url)
    else:
        FIPS_2 = get_all_state_FIPS_2()['FIPS_2']
        for c in FIPS_2:
            url = build_url
            url = url.replace("__stateFIPS__", c)
            # specified NAICS code year depends on year of data
            if year in ['2017']:
                url = url.replace("__NAICS__", "NAICS2017")
                url = url.replace("__countyFIPS__", "*")
            if year in ['2012', '2013', '2014', '2015', '2016']:
                url = url.replace("__NAICS__", "NAICS2012")
                url = url.replace("__countyFIPS__", "*")
            urls_census.append(url)

    return urls_census