Пример #1
0
def assign_naics(df):

    cw_load = load_bea_crosswalk()
    cw = cw_load[['BEA_2012_Detail_Code', 'NAICS_2012_Code']].drop_duplicates().reset_index(drop=True)
    # drop all rows with naics >6
    cw = cw[cw['NAICS_2012_Code'].apply(lambda x: len(str(x)) == 6)].reset_index(drop=True)

    df = pd.merge(df, cw, left_on='Activity', right_on='BEA_2012_Detail_Code')
    df = df.drop(columns=["BEA_2012_Detail_Code"])
    df = df.rename(columns={"NAICS_2012_Code": "Sector"})
    df['SectorSourceName'] = 'NAICS_2012_Code'

    return df


if __name__ == '__main__':
    # select years to pull unique activity names
    years = ['2002']
    # df of unique ers activity names
    df = unique_activity_names('BEA_Make_Table', years)
    # add manual naics 2012 assignments
    df = assign_naics(df)
    # drop any rows where naics12 is 'nan' (because level of detail not needed or to prevent double counting)
    df.dropna(subset=["Sector"], inplace=True)
    # assign sector type
    df['SectorType'] = None
    # sort df
    df = order_crosswalk(df)
    # save as csv
    df.to_csv(datapath + "activitytosectormapping/" + "Crosswalk_BEA_Make_Table_toNAICS.csv", index=False)
Пример #2
0
    # drop all rows with naics >6
    cw = cw[cw['NAICS_2012_Code'].apply(lambda x: len(str(x)) == 6)].reset_index(drop=True)

    df = pd.merge(df, cw, left_on='Activity', right_on='BEA_2012_Detail_Code')
    df = df.drop(columns=["BEA_2012_Detail_Code"])
    df = df.rename(columns={"NAICS_2012_Code": "Sector"})
    df['SectorSourceName'] = 'NAICS_2012_Code'

    return df


if __name__ == '__main__':
    # select years to pull unique activity names
    years = ['2002']
    # flowclass
    flowclass = ['Money']
    # datasource
    datasource = 'BEA_Make_Table'
    # df of unique ers activity names
    df = unique_activity_names(flowclass, years, datasource)
    # add manual naics 2012 assignments
    df = assign_naics(df)
    # drop any rows where naics12 is 'nan' (because level of detail not needed or to prevent double counting)
    df.dropna(subset=["Sector"], inplace=True)
    # assign sector type
    df['SectorType'] = None
    # sort df
    df = order_crosswalk(df)
    # save as csv
    df.to_csv(datapath + "activitytosectormapping/" + "Crosswalk_" + datasource + "_toNAICS.csv", index=False)
Пример #3
0
# write_Crosswalk_Census_CBP.py (scripts)
# !/usr/bin/env python3
# coding=utf-8
# [email protected]
"""
Create a crosswalk for Census CBP to NAICS 2012. Downloaded data is already provided in NAICS
"""
import pandas as pd
from flowsa.common import datapath
from scripts.common_scripts import unique_activity_names, order_crosswalk

if __name__ == '__main__':
    # select years to pull unique activity names
    years = ['2012']
    # df of unique ers activity names
    df = unique_activity_names('Census_CBP', years)
    # Activity and Sector are the same
    df['Sector'] = df['Activity'].copy()
    # modify the sector for activity = '31-33'
    df.loc[df['Activity'] == '31-33', 'Sector'] = '31'
    df = df.append(pd.DataFrame(
        [['Census_CBP', '31-33', '32']],
        columns=['ActivitySourceName', 'Activity', 'Sector']),
                   sort=True)
    df = df.append(pd.DataFrame(
        [['Census_CBP', '31-33', '33']],
        columns=['ActivitySourceName', 'Activity', 'Sector']),
                   sort=True)
    # Add additional columns
    df['SectorSourceName'] = "NAICS_2012_Code"
    df['SectorType'] = "I"
Пример #4
0
    cw = cw.sort_values(['BEA_2012_Detail_Code', 'NAICS_2012_Code'])

    df = pd.merge(df, cw, left_on='Activity', right_on='BEA_2012_Detail_Code')
    df = df.drop(columns=["BEA_2012_Detail_Code"])
    df = df.rename(columns={"NAICS_2012_Code": "Sector"})
    df['SectorSourceName'] = 'NAICS_2012_Code'

    return df


if __name__ == '__main__':
    # select years to pull unique activity names
    year = '2002'
    # datasource
    datasource = 'Blackhurst_IO'
    # df of unique ers activity names
    df = unique_activity_names(datasource, year)
    # add manual naics 2012 assignments
    df = assign_naics(df)
    # drop any rows where naics12 is 'nan'
    # (because level of detail not needed or to prevent double counting)
    df.dropna(subset=["Sector"], inplace=True)
    # assign sector type
    df['SectorType'] = None
    # sort df
    df = order_crosswalk(df)
    # save as csv
    df.to_csv(datapath + "activitytosectormapping/" + "Crosswalk_" +
              datasource + "_toNAICS.csv",
              index=False)
Пример #5
0
    # df.loc[df['Activity'] ==
    #        'Summary: Pre-Reform Act Simultaneous Leases, Public Domain and Acquired Lands',
    #        'Sector'] = ''

    return df


if __name__ == '__main__':
    # select years to pull unique activity names
    years = ['2007', '2011', '2012']
    # assign datasource
    datasource = 'BLM_PLS'
    # df of unique ers activity names
    df_list = []
    for y in years:
        dfy = unique_activity_names(datasource, y)
        df_list.append(dfy)
    df = pd.concat(df_list, ignore_index=True).drop_duplicates()
    # add manual naics 2012 assignments
    df = assign_naics(df)
    # assign sector source name
    df['SectorSourceName'] = 'NAICS_2012_Code'
    # drop any rows where naics12 is 'nan'
    # (because level of detail not needed or to prevent double counting)
    df.dropna(subset=["Sector"], inplace=True)
    # assign sector type
    df['SectorType'] = "I"
    # sort df
    df = order_crosswalk(df)
    # save as csv
    df.to_csv(datapath + "activitytosectormapping/" + "Crosswalk_" +
    # all other animal production: 11299
    df.loc[df['Activity'] ==
           'Animals and products, Other animals and products',
           'Sector'] = '112990E'
    # df.loc[df['Activity'] == 'Animals and products, All other animals and products', 'Sector'] = '112990E1'
    # df.loc[df['Activity'] == 'Animals and products, Milk pelts', 'Sector'] = '112990E1'
    df.loc[df['Activity'] == 'Wool', 'Sector'] = '112990F'

    ################# FORESTRY ##############33
    df.loc[df['Activity'] == 'Forest products', 'Sector'] = '11531'

    return df


if __name__ == '__main__':
    # select years to pull unique activity names
    years = ['2012', '2017']
    # df of unique ers activity names
    df = unique_activity_names('USDA_ERS_FIWS', years)
    # add manual naics 2012 assignments
    df = assign_naics(df)
    # drop any rows where naics12 is 'nan' (because level of detail not needed or to prevent double counting)
    df.dropna(subset=["Sector"], inplace=True)
    # assign sector type
    df['SectorType'] = None
    # sort df
    df = order_crosswalk(df)
    # save as csv
    df.to_csv(datapath + "activitytosectormapping/" +
              "Crosswalk_USDA_ERS_FIWS_toNAICS.csv",
              index=False)
Пример #7
0
# write_Crosswalk_USDA_CoA_Cropland_NAICS.py (scripts)
# !/usr/bin/env python3
# coding=utf-8
"""
Create a crosswalk for CoA Cropland Naics to NAICS 2012. Downloaded data is already provided in NAICS
"""
import pandas as pd
from flowsa.common import datapath
from scripts.common_scripts import unique_activity_names, order_crosswalk

if __name__ == '__main__':
    # select years to pull unique activity names
    years = ['2012', '2017']
    # df of unique ers activity names
    df = unique_activity_names('USDA_CoA_Cropland_NAICS', years)
    # drop activities with symbol '&'
    df = df[~df['Activity'].str.contains('&')]
    # Activity and Sector are the same
    df['Sector'] = df['Activity'].copy()
    # modify the sector for activity ranges
    #df.loc[df['Activity'] == '11193 & 11194 & 11199', 'Sector'] = '11193'
    df = df.append(pd.DataFrame(
        [['USDA_CoA_Cropland_NAICS', '11193 & 11194 & 11199', '11193']],
        columns=['ActivitySourceName', 'Activity', 'Sector']),
                   sort=True)
    df = df.append(pd.DataFrame(
        [['USDA_CoA_Cropland_NAICS', '11193 & 11194 & 11199', '11194']],
        columns=['ActivitySourceName', 'Activity', 'Sector']),
                   sort=True)
    df = df.append(pd.DataFrame(
        [['USDA_CoA_Cropland_NAICS', '11193 & 11194 & 11199', '11199']],
    df.loc[df['Activity'] == 'MINK, LIVE', 'Sector'] = '112930A'
    df.loc[df['Activity'] == 'RABBITS, LIVE', 'Sector'] = '112930B'

    # all other animal production: 11299
    df.loc[df['Activity'] == 'ALPACAS', 'Sector'] = '112990A'
    df.loc[df['Activity'] == 'BISON', 'Sector'] = '112990B'
    df.loc[df['Activity'] == 'DEER', 'Sector'] = '112990C'
    df.loc[df['Activity'] == 'ELK', 'Sector'] = '112990D'
    df.loc[df['Activity'] == 'LLAMAS', 'Sector'] = '112990E'

    return df


if __name__ == '__main__':
    # select years to pull unique activity names
    years = ['2012', '2017']
    # df of unique ers activity names
    df = unique_activity_names('USDA_CoA_Livestock', years)
    # add manual naics 2012 assignments
    df = assign_naics(df)
    # drop any rows where naics12 is 'nan' (because level of detail not needed or to prevent double counting)
    df.dropna(subset=["Sector"], inplace=True)
    # assign sector type
    df['SectorType'] = None
    # sort df
    df = order_crosswalk(df)
    # save as csv
    df.to_csv(datapath + "activitytosectormapping/" +
              "Crosswalk_USDA_CoA_Livestock_toNAICS.csv",
              index=False)
    cw = cw[cw['NAICS_2012_Code'].apply(
        lambda x: len(str(x)) == 6)].reset_index(drop=True)

    cw = cw.sort_values(['BEA_2012_Detail_Code', 'NAICS_2012_Code'])

    df = pd.merge(df, cw, left_on='Activity', right_on='BEA_2012_Detail_Code')
    df = df.drop(columns=["BEA_2012_Detail_Code"])
    df = df.rename(columns={"NAICS_2012_Code": "Sector"})
    df['SectorSourceName'] = 'NAICS_2012_Code'

    return df


if __name__ == '__main__':
    # select years to pull unique activity names
    years = ['2002']
    # df of unique ers activity names
    df = unique_activity_names('Blackhurst_IO', years)
    # add manual naics 2012 assignments
    df = assign_naics(df)
    # drop any rows where naics12 is 'nan' (because level of detail not needed or to prevent double counting)
    df.dropna(subset=["Sector"], inplace=True)
    # assign sector type
    df['SectorType'] = None
    # sort df
    df = order_crosswalk(df)
    # save as csv
    df.to_csv(datapath + "activitytosectormapping/" +
              "Crosswalk_Blackhurst_IO_toNAICS.csv",
              index=False)
Пример #10
0
    act_list = unmapped_df['Activity'].values.tolist()

    # if in the activity list, the sector should be modified so last digit is 0

    df['Sector'] = np.where(df['Activity'].isin(act_list),
                            df['Activity'].apply(lambda x: x[0:5]) + '0',
                            df['Sector'])

    return df


if __name__ == '__main__':
    # select years to pull unique activity names
    years = ['2002', '2010', '2011', '2012', '2015']
    # df of unique ers activity names
    df = unique_activity_names('BLS_QCEW', years)
    # Activity and Sector are the same
    df['Sector'] = df['Activity'].copy()
    # modify the sector for activity = '31-33'
    df.loc[df['Activity'] == '31-33', 'Sector'] = '31'
    df = df.append(pd.DataFrame(
        [['BLS_QCEW', '31-33', '32']],
        columns=['ActivitySourceName', 'Activity', 'Sector']),
                   sort=True)
    df = df.append(pd.DataFrame(
        [['BLS_QCEW', '31-33', '33']],
        columns=['ActivitySourceName', 'Activity', 'Sector']),
                   sort=True)
    df = link_non_bls_naics_to_naics(df)
    # Add additional columns
    df['SectorSourceName'] = "NAICS_2012_Code"
    # df.loc[df['Activity'] == 'Total', 'Sector'] = ''
    # df.loc[df['Activity'] == 'Total Groundwater', 'Sector'] = ''
    # df.loc[df['Activity'] == 'Total Surface', 'Sector'] = ''

    # assign sector source name
    df['SectorSourceName'] = 'NAICS_2012_Code'

    return df


if __name__ == '__main__':
    # select years to pull unique activity names
    years = ['2010', '2015']
    # df of unique ers activity names
    df = unique_activity_names('USGS_NWIS_WU', years)
    # add manual naics 2012 assignments
    df = assign_naics(df)
    # drop any rows where naics12 is 'nan' (because level of detail not needed or to prevent double counting)
    df.dropna(subset=["Sector"], inplace=True)
    # assign sector type
    df['SectorType'] = 'I'
    # assign sector type
    df['ActivitySourceName'] = 'USGS_NWIS_WU'
    # sort df
    df = order_crosswalk(df)
    # save as csv
    df.to_csv(datapath + "activitytosectormapping/" +
              "Crosswalk_USGS_NWIS_WU_toNAICS.csv",
              index=False)
"""
Create a crosswalk linking Statistics Canada to NAICS

"""
import pandas as pd
from flowsa.common import datapath
from scripts.common_scripts import unique_activity_names, order_crosswalk



if __name__ == '__main__':
    # select years to pull unique activity names
    years = ['2011', '2015']
    # df of unique ers activity names
    df = unique_activity_names('StatCan_GDP', years)
    # add manual naics 2012 assignments
    # Activity and Sector are the same
    df['Sector'] = df['Activity'].copy()
    # modify the sector for activity = '31-33'
    df.loc[df['Activity'] == '31-33', 'Sector'] = '31'
    df = df.append(pd.DataFrame([['StatCan_GDP', '31-33', '32']],
                                columns=['ActivitySourceName', 'Activity', 'Sector']), sort=True)
    df = df.append(pd.DataFrame([['StatCan_GDP', '31-33', '33']],
                                columns=['ActivitySourceName', 'Activity', 'Sector']), sort=True)
    # drop 'Other' and nan
    df = df[~df['Activity'].isin(['Other', 'nan'])]
    # Add additional columns
    df['SectorSourceName'] = "NAICS_2012_Code"
    df['SectorType'] = "I"
    # reorder
Пример #13
0
    df.loc[df['Activity'] == 'SWITCHGRASS', 'Sector'] = '111998L'
    df.loc[df['Activity'] == 'FIELD CROPS, OTHER', 'Sector'] = '111998M'

    return df


if __name__ == '__main__':
    # select years to pull unique activity names
    years = ['2012', '2015', '2017', '2018', '2020']
    # datasource
    datasources = ['USDA_ACUP_Pesticide', 'USDA_ACUP_Fertilizer']
    # loop through datasources:
    for d in datasources:
        # df of unique activity names
        df_list = []
        for y in years:
            dfy = unique_activity_names(d, y)
            df_list.append(dfy)
        df = pd.concat(df_list, ignore_index=True).drop_duplicates()
        # add manual naics 2012 assignments
        df = assign_naics(df)
        # drop any rows where naics12 is 'nan'
        # (because level of detail not needed or to prevent double counting)
        df = df.dropna()
        # assign sector type
        df['SectorType'] = None
        # sort df
        df = order_crosswalk(df)
        # save as csv
        df.to_csv(f"{datapath}activitytosectormapping/NAICS_Crosswalk_{d}.csv", index=False)
Пример #14
0
# !/usr/bin/env python3
# coding=utf-8
# [email protected]
"""
Create a crosswalk linking Statistics Canada to NAICS

"""
import pandas as pd
from flowsa.common import datapath, fbaoutputpath
from scripts.common_scripts import unique_activity_names, order_crosswalk

if __name__ == '__main__':
    # select years to pull unique activity names
    years = ['2011', '2015']
    # df of unique ers activity names
    df = unique_activity_names('StatCan_IWS_MI', years)
    # add manual naics 2012 assignments
    # Activity and Sector are the same
    df['Sector'] = df['Activity'].copy()
    # modify the sector for activity = '31-33'
    df.loc[df['Activity'] == '31-33', 'Sector'] = '31'
    df = df.append(pd.DataFrame(
        [['StatCan_IWS_MI', '31-33', '32']],
        columns=['ActivitySourceName', 'Activity', 'Sector']),
                   sort=True)
    df = df.append(pd.DataFrame(
        [['StatCan_IWS_MI', '31-33', '33']],
        columns=['ActivitySourceName', 'Activity', 'Sector']),
                   sort=True)
    # drop 'Other' and nan
    df = df[~df['Activity'].isin(['Other', 'nan'])]
                   sort=True)

    # all other animal production: 11299, manually add row
    df = df.append(pd.DataFrame(
        [['USGS_WU_Coef', 'Sheep and lambs', 'NAICS_2012_Code', '11299']],
        columns=[
            'ActivitySourceName', 'Activity', 'SectorSourceName', 'Sector'
        ]),
                   ignore_index=True,
                   sort=True)

    return df


if __name__ == '__main__':
    # select unique activity names from file
    years = ['2005']
    df = unique_activity_names('USGS_WU_Coef', years)
    # add manual naics 2012 assignments
    df = assign_naics(df)
    # drop any rows where naics12 is 'nan' (because level of detail not needed or to prevent double counting)
    df.dropna(subset=["Sector"], inplace=True)
    # assign sector type
    df['SectorType'] = None
    # sort df
    df = order_crosswalk(df)
    # save as csv
    df.to_csv(datapath + "activitytosectormapping/" +
              "Crosswalk_USGS_WU_Coef_toNAICS.csv",
              index=False)
    df.loc[df['Activity'] == 'VEGETABLE TOTALS', 'Sector'] = '1112'

    df.loc[df['Activity'] == 'WHEAT', 'Sector'] = '11114'

    # df.loc[df['Activity'] == 'LETTUCE', 'Sector'] = ''
    # df.loc[df['Activity'] == 'POTATOES', 'Sector'] = ''
    # df.loc[df['Activity'] == 'SWEET CORN', 'Sector'] = ''
    # df.loc[df['Activity'] == 'TOMATOES', 'Sector'] = ''

    return df


if __name__ == '__main__':
    # select years to pull unique activity names
    years = ['2013', '2018']
    # df of unique ers activity names
    df = unique_activity_names('USDA_IWMS', years)
    # add manual naics 2012 assignments
    df = assign_naics(df)
    # drop any rows where naics12 is 'nan' (because level of detail not needed or to prevent double counting)
    df.dropna(subset=["Sector"], inplace=True)
    # assign sector type
    df['SectorType'] = None
    # sort df
    df = order_crosswalk(df)
    # save as csv
    df.to_csv(datapath + "activitytosectormapping/" +
              "Crosswalk_USDA_IWMS_toNAICS.csv",
              index=False)