Пример #1
0
def load_soi_farm_prop(data_tree=naics.generate_tree(),
                       blue_tree=None,
                       blueprint=None,
                       from_out=False,
                       out_path=_FARM_PROP_OUT_PATH):
    """ This function loads the soi nonfarm proprietorship data:
    
    :param data_tree: The NAICS tree to read the data into.
    :param cols_dict: A dictionary mapping dataframe columns to the name of
           the column names in the input file
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    :param output_path: The path of the output file.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path, tree=data_tree)
        return data_tree
    # Load Farm Proprietorship data:
    farm_data = pd.read_csv(_FARM_IN_PATH)
    new_farm_cols = ["Land", "FA"]
    #
    data_tree.append_all(df_nm=_FARM_DF_NM, df_cols=new_farm_cols)
    #
    land_mult = ((farm_data["R_sp"][0] + farm_data["Q_sp"][0]) *
                 (float(farm_data["A_sp"][0]) / farm_data["A_p"][0]))
    total = farm_data["R_p"][0] + farm_data["Q_p"][0]
    total_pa = 0
    cur_codes = [111, 112]
    proportions = np.zeros(len(cur_codes))
    proportions = naics.get_proportions(cur_codes, data_tree, _AST_PRT_DF_NM,
                                        [_LAND_COL_NM, _DEPR_COL_NM])
    #
    for ind_code in cur_codes:
        cur_ind = naics.find_naics(data_tree, ind_code)
        cur_df = cur_ind.data.dfs[_AST_PRT_DF_NM]
        total_pa += (cur_df[_LAND_COL_NM][0] + cur_df[_DEPR_COL_NM][0])
    #
    for i in xrange(0, len(cur_codes)):
        cur_ind = naics.find_naics(data_tree, cur_codes[i])
        cur_ind.data.dfs[_FARM_DF_NM]["Land"][0] = (
            land_mult * cur_ind.data.dfs[_AST_PRT_DF_NM][_LAND_COL_NM][0] /
            total_pa)
        cur_ind.data.dfs[_FARM_DF_NM]["FA"][0] = (
            (proportions.iloc[1, i] * total) -
            cur_ind.data.dfs[_FARM_DF_NM]["Land"][0])
    # Default:
    if blueprint == None and _TOT_CORP_DF_NM in data_tree.enum_inds[
            0].data.dfs.keys():
        blueprint = _TOT_CORP_DF_NM
    naics.pop_back(tree=data_tree, df_list=[_FARM_DF_NM])
    naics.pop_forward(tree=data_tree,
                      df_list=[_FARM_DF_NM],
                      blueprint=blueprint,
                      blue_tree=blue_tree)
    #
    return data_tree
Пример #2
0
def read_inventories(asset_tree):
    # Opening BEA's excel file on depreciable assets by industry:
    inv_book = xlrd.open_workbook(_INV_IN_PATH)
    sht0 = inv_book.sheet_by_index(0)
    num_rows = sht0.nrows
    num_cols = sht0.ncols
    #Find the starting index in worksheet.
    cur_index = naics.search_ws(sht0, 1, 25, True, [0, 0], True)
    check_index = naics.search_ws(sht0, "line", 20)
    if (cur_index[1] != check_index[1]):
        print "ERROR"
    # Reading in the crosswalk:
    inv_cross = pd.read_csv(_INV_IN_CROSS_PATH)
    # Creating a tree for the inventory data:
    data_cols = ["All", "Corp", "Non-Corp"]
    inv_tree = naics.generate_tree()
    inv_tree.append_all(df_nm="Inventories", df_cols=data_cols)
    #
    inv_data = np.zeros(inv_cross.shape[0])
    #
    cross_index = 0
    for i in xrange(cur_index[0], num_rows):
        if (cross_index >= inv_cross.shape[0]):
            break
        cur_list = str(sht0.cell_value(i, cur_index[1])).strip()
        cur_name = str(sht0.cell_value(i, cur_index[1] + 1)).strip()
        checks = ((str(cur_list) == str(inv_cross["List"][cross_index])) and
                  (str(cur_name) == str(inv_cross["Industry"][cross_index])))
        if (checks):
            cross_index += 1
            try:
                cur_value = float(sht0.cell_value(i, num_cols - 1))
            except ValueError:
                continue
            inv_data[cross_index - 1] = cur_value
            # Data is in billions:
            inv_data[cross_index -
                     1] = _INV_IN_FILE_FCTR * inv_data[cross_index - 1]
    #
    for i in xrange(0, inv_cross.shape[0]):
        cur_codes = inv_cross["NAICS"][i].strip().split(".")
        proportions = naics.get_proportions(cur_codes, asset_tree, "INV")
        for j in xrange(0, proportions.shape[1]):
            cur_ind = inv_tree.enum_inds[int(proportions.iloc[0, j])]
            prev_ind = asset_tree.enum_inds[int(proportions.iloc[0, j])]
            prev_df = prev_ind.data.dfs["INV"]
            if (sum(prev_df.iloc[0, :]) != 0):
                cur_dfs = ((prev_df / sum(prev_df.iloc[0, :])) *
                           (inv_data[i] * proportions.iloc[1, j]))
                inv_df = cur_ind.data.dfs["Inventories"]
                inv_df["All"] += sum(cur_dfs.iloc[0, :])
                for k in _CORP_NMS:
                    inv_df["Corp"] += cur_dfs[k][0]
                for k in _NCORP_NMS:
                    inv_df["Non-Corp"] += cur_dfs[k][0]
    #
    naics.pop_back(inv_tree, ["Inventories"])
    naics.pop_forward(inv_tree, ["Inventories"], "INV", asset_tree)
    return inv_tree
Пример #3
0
def read_inventories(asset_tree):
    # Opening BEA's excel file on depreciable assets by industry:
    inv_book = xlrd.open_workbook(_INV_IN_PATH)
    sht0 = inv_book.sheet_by_index(0)
    num_rows = sht0.nrows
    num_cols = sht0.ncols
    #Find the starting index in worksheet.
    cur_index = naics.search_ws(sht0, 1, 25, True, [0,0], True)
    check_index = naics.search_ws(sht0, "line", 20)
    if(cur_index[1] != check_index[1]):
        print "ERROR"
    # Reading in the crosswalk:
    inv_cross = pd.read_csv(_INV_IN_CROSS_PATH)
    # Creating a tree for the inventory data:
    data_cols = ["All", "Corp", "Non-Corp"]
    inv_tree = naics.generate_tree()
    inv_tree.append_all(df_nm="Inventories", df_cols=data_cols)
    #
    inv_data = np.zeros(inv_cross.shape[0])
    #
    cross_index = 0
    for i in xrange(cur_index[0], num_rows):
        if(cross_index >= inv_cross.shape[0]):
            break
        cur_list = str(sht0.cell_value(i, cur_index[1])).strip()
        cur_name = str(sht0.cell_value(i, cur_index[1]+1)).strip()
        checks = ((str(cur_list) == str(inv_cross["List"][cross_index])) and 
                    (str(cur_name) == str(inv_cross["Industry"][cross_index])))
        if(checks):
            cross_index += 1
            try:
                cur_value = float(sht0.cell_value(i, num_cols-1))
            except ValueError:
                continue
            inv_data[cross_index-1] = cur_value
            # Data is in billions:
            inv_data[cross_index-1] = _INV_IN_FILE_FCTR * inv_data[cross_index-1]
    #
    for i in xrange(0, inv_cross.shape[0]):
        cur_codes = inv_cross["NAICS"][i].strip().split(".")
        proportions = naics.get_proportions(cur_codes, asset_tree, "INV")
        for j in xrange(0, proportions.shape[1]):
            cur_ind = inv_tree.enum_inds[int(proportions.iloc[0,j])]
            prev_ind = asset_tree.enum_inds[int(proportions.iloc[0,j])]
            prev_df = prev_ind.data.dfs["INV"]
            if(sum(prev_df.iloc[0, :]) != 0):
                cur_dfs = ((prev_df/sum(prev_df.iloc[0,:])) *
                                (inv_data[i] * proportions.iloc[1,j]))
                inv_df = cur_ind.data.dfs["Inventories"]
                inv_df["All"] += sum(cur_dfs.iloc[0,:])
                for k in _CORP_NMS:
                    inv_df["Corp"] += cur_dfs[k][0]
                for k in _NCORP_NMS:
                    inv_df["Non-Corp"] += cur_dfs[k][0]
    #
    naics.pop_back(inv_tree, ["Inventories"])
    naics.pop_forward(inv_tree, ["Inventories"], "INV", asset_tree)
    return inv_tree
def load_soi_farm_prop(data_tree = naics.generate_tree(),
                       blue_tree = None, blueprint = None,
                       from_out=False, out_path=_FARM_PROP_OUT_PATH):
    """ This function loads the soi nonfarm proprietorship data:
    
    :param data_tree: The NAICS tree to read the data into.
    :param cols_dict: A dictionary mapping dataframe columns to the name of
           the column names in the input file
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    :param output_path: The path of the output file.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path, tree=data_tree)
        return data_tree
    # Load Farm Proprietorship data:
    farm_data = pd.read_csv(_FARM_IN_PATH)
    new_farm_cols = ["Land", "FA"]
    #
    for i in data_tree.enum_inds:
        i.append_dfs((_FARM_DF_NM,
                      pd.DataFrame(np.zeros((1,len(new_farm_cols))), 
                                   columns=new_farm_cols)))
    #
    land_mult = ((farm_data["R_sp"][0] + farm_data["Q_sp"][0]) * 
                        (float(farm_data["A_sp"][0])/farm_data["A_p"][0]))
    total = farm_data.iloc[0,0] + farm_data.iloc[0,2]
    total_pa = 0
    cur_codes = [111,112]
    proportions = np.zeros(len(cur_codes))
    proportions = naics.get_proportions(cur_codes, data_tree, "PA_assets", 
                                 ["Land (Net)","Depreciable assets (Net)"])
    #
    for i in xrange(0, len(cur_codes)):
        cur_ind = naics.find_naics(data_tree, cur_codes[i])
        cur_df = cur_ind.data.dfs["PA_assets"]
        total_pa += (cur_df["Land (Net)"][0] + 
                                cur_df["Depreciable assets (Net)"][0])
    #
    for i in xrange(0,len(cur_codes)):
        cur_ind = naics.find_naics(data_tree, cur_codes[i])
        cur_ind.data.dfs[_FARM_DF_NM]["Land"][0] = (land_mult * 
                            cur_ind.data.dfs["PA_assets"]["Land (Net)"][0]/
                            total_pa)
        cur_ind.data.dfs[_FARM_DF_NM]["FA"][0] = ((proportions.iloc[1,i]*total)
                                    - cur_ind.data.dfs[_FARM_DF_NM]["Land"][0])
    # Default:            
    if blueprint == None and _TOT_CORP_DF_NM in data_tree.enum_inds[0].data.dfs.keys():
        blueprint = _TOT_CORP_DF_NM
    naics.pop_back(tree=data_tree, df_list=[_FARM_DF_NM])
    naics.pop_forward(tree=data_tree, df_list=[_FARM_DF_NM],
                      blueprint=blueprint, blue_tree=blue_tree)
    #
    return data_tree
Пример #5
0
def load_soi_farm_prop(data_tree = None, blue_tree = None, blueprint = None):
    #
    if data_tree == None:
        data_tree = naics.generate_tree()
    #Load Farm Proprietorship data:
    farm_data = pd.read_csv(os.path.abspath(prop_dir + "\\Farm_Data.csv"))
    new_farm_cols = ["Land", "FA"]
    #
    for i in data_tree.enum_inds:
        i.append_dfs(("farm_prop",
                      pd.DataFrame(np.zeros((1,len(new_farm_cols))), 
                                   columns=new_farm_cols)))
    #
    land_mult = ((farm_data["R_sp"][0] + farm_data["Q_sp"][0]) * 
                        (float(farm_data["A_sp"][0])/farm_data["A_p"][0]))
    total = farm_data.iloc[0,0] + farm_data.iloc[0,2]
    total_pa = 0
    cur_codes = [111,112]
    proportions = np.zeros(len(cur_codes))
    proportions = naics.get_proportions(cur_codes, data_tree, "PA_assets", 
                                 ["Land (Net)","Depreciable assets (Net)"])
    #
    for i in xrange(0, len(cur_codes)):
        cur_ind = naics.find_naics(data_tree, cur_codes[i])
        cur_df = cur_ind.data.dfs["PA_assets"]
        total_pa += (cur_df["Land (Net)"][0] + 
                                cur_df["Depreciable assets (Net)"][0])
    #
    for i in xrange(0,len(cur_codes)):
        cur_ind = naics.find_naics(data_tree, cur_codes[i])
        cur_ind.data.dfs["farm_prop"]["Land"][0] = (land_mult * 
                            cur_ind.data.dfs["PA_assets"]["Land (Net)"][0]/
                            total_pa)
        cur_ind.data.dfs["farm_prop"]["FA"][0] = ((proportions.iloc[1,i]*total)
                                    - cur_ind.data.dfs["farm_prop"]["Land"][0])
    # Default:            
    if blueprint == None and "tot_corps" in data_tree.enum_inds[0].data.dfs.keys():
        blueprint = "tot_corps"
    naics.pop_back(tree=data_tree, df_list=["farm_prop"])
    naics.pop_forward(tree=data_tree, df_list=["farm_prop"],
                      blueprint=blueprint, blue_tree=blue_tree)
    #
    return data_tree
Пример #6
0
def read_bea(output_tree, data_folder):
    # The directory with BEA data:
    bea_folder = os.path.abspath(data_folder + "\\BEA")
    # Opening BEA's excel file on depreciable assets by industry:
    bea_book = xlrd.open_workbook(os.path.abspath(
                                    bea_folder + "\\detailnonres_stk1.xlsx"))
    sht_names = bea_book.sheet_names()
    num_shts = bea_book.nsheets
    # Opening "readme" sheet:
    try:
        bea_readme = bea_book.sheet_by_name("readme")
    except xlrd.XLRDError:
        bea_readme = bea_book.sheet_by_index(0)
    # Finding relevant positions in the readme sheet:
    sht_pos = naics.search_ws(bea_readme, "Industry Title", 25, False)
    if(sht_pos == [-1,-1]):
        sht_pos = naics.search_ws(bea_readme, "bea code", 25, False, [0,0], True)
        sht_pos[1] = sht_pos[1] - 1
    if(sht_pos == [-1,-1]):
        print "Error in reading BEA fixed asset \"readme\" sheet."
        return None
    cur_row = sht_pos[0] + 1
    cur_col = sht_pos[1]
    # Finding the number of industries (includes those without bea codes):
    number_of_industries = 0
    while cur_row < bea_readme.nrows:
        if(str(bea_readme.cell_value(cur_row, cur_col)) != ""):
            number_of_industries += 1
        cur_row += 1
    # Making a list of BEA codes based on the names of the worksheets:
    bea_codes1 = np.zeros(num_shts-1, dtype=object)
    for index in xrange(1, num_shts):
        bea_codes1[index-1] = str(sht_names[index])
    # Making a list of BEA codes based on info in the readme sheet:
    code_index = 0
    cur_row = sht_pos[0] + 1
    cur_col = sht_pos[1]
    bea_codes2 = np.zeros(number_of_industries, dtype=object)
    while cur_row < bea_readme.nrows:
        if(str(bea_readme.cell_value(cur_row, cur_col)) != ""):
            cur_code = str(bea_readme.cell_value(cur_row, cur_col+1))
            cur_code = cur_code.replace("\xa0", " ").strip()
            bea_codes2[code_index] = cur_code
            code_index += 1
        cur_row += 1
    # Reading in a list of the assets in the BEA file:
    list_file = os.path.abspath(bea_folder + "\\detailnonres_list.csv")
    asset_list = pd.read_csv(list_file)
    for i in xrange(0, asset_list.shape[0]):
        asset_list.iloc[i,0] = asset_list.iloc[i,0].replace("\xa0", " ")
        asset_list.iloc[i,0] = asset_list.iloc[i,0].strip()
    
    # Reading in the corresponding naics codes:
    naics_file = os.path.abspath(bea_folder + "\\detailnonres_naics.csv")
    naics_cross = pd.read_csv(naics_file).replace("\xa0", " ")
    naics_inds = naics_cross["Industry"]
    for i in xrange(0, naics_cross.shape[0]):
        naics_inds[i] = naics_inds[i].replace("\xa0", " ").strip()
    # Creating a chart cross-referencing industry names, BEA and NAICS codes.
    chart_cols = ["Industry","BEA Code","NAICS Code"]
    bea_chart = pd.DataFrame(np.zeros(shape=(num_shts-2,3), dtype=object),
                             columns = chart_cols)
    bea_inds = bea_chart["Industry"]
    bea_naics = bea_chart["NAICS Code"]
    cur_row = sht_pos[0] + 1
    cur_col = sht_pos[1]
    num_naics = naics_cross.shape[0]
    # Filling chart with naics codes that are in both lists and the crosswalk:
    naics_counter = 0
    for i in range(0, num_shts-2):
        for cur_row in range(sht_pos[0]+1, bea_readme.nrows):
            bea_code = str(bea_readme.cell_value(cur_row,cur_col+1))
            if(str(bea_codes1[i]) == bea_code):
                bea_ind = str(bea_readme.cell_value(cur_row,cur_col))
                bea_ind = bea_ind.replace('\xa0', ' ').strip()
                bea_inds[i] = bea_ind
                bea_chart["BEA Code"][i] = bea_code
                for k in xrange(0, num_naics):
                    naics_counter = (naics_counter+1) % num_naics
                    if(naics_inds[naics_counter] == bea_chart["Industry"][i]):
                       bea_naics[i] = naics_cross["NAICS"][naics_counter]
                       break
                break
            # If they match except one has ".0" at the end:
            elif(str(bea_codes1[i]) == 
                    str(bea_readme.cell_value(cur_row, cur_col+1))[:-2]):
                bea_ind = str(bea_readme.cell_value(cur_row, cur_col))
                bea_ind = bea_ind.replace('\xa0', ' ').strip()
                bea_chart["Industry"][i] = bea_ind
                cur_code = str(bea_readme.cell_value(cur_row, cur_col+1))[:-2]
                bea_chart["BEA Code"][i] = cur_code
                for k in xrange(0, num_naics):
                    naics_counter = (naics_counter+1) % num_naics
                    if(naics_inds[naics_counter] == bea_inds[i]):
                        bea_naics[i] = naics_cross["NAICS"][naics_counter]
                        break
                break
    # Initializing the table of assets:
    #cur_sht = bea_book.sheet_by_name(bea_chart["BEA Code"][0])
    #sht_pos = naics.search_ws(cur_sht, "asset codes", 25, False)
    bea_table = pd.DataFrame(np.zeros((asset_list.shape[0],
                                       bea_chart.shape[0])), 
                             columns = bea_chart["BEA Code"])
    # For each industry, calculating 
    for i in bea_chart["BEA Code"]:
        cur_sht = bea_book.sheet_by_name(i)
        sht_pos = naics.search_ws(cur_sht, "asset codes", 25, False)
        for j in xrange(0, len(asset_list)): #xrange(sht_pos[0]+2, cur_sht.nrows):
            cur_asset = asset_list.iloc[j,0]
            for k in xrange(sht_pos[0]+2, cur_sht.nrows):
                cur_cell = str(cur_sht.cell_value(k, sht_pos[1]+1))
                cur_cell = cur_cell.replace("\xa0", " ").strip()
                if(cur_asset == cur_cell):
                    bea_table[i][j] = float(
                                        cur_sht.cell_value(k, cur_sht.ncols-1)
                                        )
        #bea_table[i] = np.array(cur_sht.col_values(cur_sht.ncols-1, sht_pos[0]+2, cur_sht.nrows))
    # The dollar amounts are in millions:
    bea_table = bea_table.convert_objects(convert_numeric=True).fillna(0)
    bea_table = bea_table * 1000000
    # Breaking down by corporate tax status:
    corp_types = ["C Corporations",
                  "Corporate general partners", 
                  "Corporate limited partners"]
    non_corp_types = ["S Corporations",
                      "Individual general partners",
                      "Individual limited partners",
                      "Partnership general partners",
                      "Partnership limited partners",
                      "Tax-exempt organization general partners",
                      "Tax-exempt organization limited partners",
                      "Nominee and other general partners", 
                      "Nominee and other limited partners",
                      "Sole Proprietors"]
    # Initialize tree for assets data:
    asset_tree = naics.load_naics(data_folder + "\\2012_NAICS_Codes.csv")
    for i in xrange(0, len(asset_tree.enum_inds)):
        asset_tree.enum_inds[i].data.append(("All", 
                pd.DataFrame(np.zeros((1, asset_list.shape[0])), 
                             columns = asset_list.iloc[:,0])))
        asset_tree.enum_inds[i].data.append(("Corp", 
                pd.DataFrame(np.zeros((1, asset_list.shape[0])),
                             columns = asset_list.iloc[:,0])))
        asset_tree.enum_inds[i].data.append(("Non-Corp", 
                pd.DataFrame(np.zeros((1, asset_list.shape[0])),
                             columns = asset_list.iloc[:,0])))
    # Fill in data from BEA's fixed asset table:
    enum_index = len(output_tree.enum_inds) - 1
    for i in xrange(0, bea_table.shape[1]):
        cur_codes = str(bea_chart["NAICS Code"][i]).split(".")
        tot_share = 0
        all_proportions = naics.get_proportions(cur_codes, output_tree, 
                                          "FA").iloc[1,:]
        corp_proportions = naics.get_proportions(cur_codes, output_tree, "FA", 
                                           corp_types).iloc[1,:]
        non_corp_proportions = naics.get_proportions(cur_codes, output_tree, 
                                               "FA", non_corp_types).iloc[1,:]
        for code_index in xrange(0, len(cur_codes)):
            for j in xrange(0, len(asset_tree.enum_inds)):
                enum_index = (enum_index+1) % len(asset_tree.enum_inds)
                out_dfs = output_tree.enum_inds[enum_index].data.dfs
                if(sum(out_dfs["FA"].iloc[0,:]) == 0):
                    continue
                all_ratio = 1.0
                corp_ratio = 0.0
                non_corp_ratio = 0.0
                for category in corp_types:
                    corp_ratio += (out_dfs["FA"][category][0]/
                                        sum(out_dfs["FA"].iloc[0,:]))
                for category in non_corp_types:
                    non_corp_ratio += (out_dfs["FA"][category][0]/
                                            sum(out_dfs["FA"].iloc[0,:]))
                cur_data = asset_tree.enum_inds[enum_index].data
                ind_codes = cur_data.dfs["Codes:"].iloc[:,0]
                share = naics.compare_codes(cur_codes, ind_codes)
                tot_share += share
                if(share == 0):
                    continue
                num_assets = asset_tree.enum_inds[0].data.dfs["All"].shape[1]
                for k in xrange(0, num_assets):
                    cur_data.dfs["All"].iloc[0,k] = (bea_table.iloc[k,i]*
                                            all_ratio*
                                            all_proportions[code_index])
                    cur_data.dfs["Corp"].iloc[0,k] = (bea_table.iloc[k,i]*
                                            corp_ratio*
                                            corp_proportions[code_index])
                    cur_data.dfs["Non-Corp"].iloc[0,k] = (bea_table.iloc[k,i]*
                                            non_corp_ratio*
                                            non_corp_proportions[code_index])
                break
            if(tot_share == 1):
                break
    return asset_tree
Пример #7
0
def read_bea(asset_tree):
    # Opening BEA's excel file on depreciable assets by industry:
    bea_book = xlrd.open_workbook(_BEA_ASSET_PATH)
    sht_names = bea_book.sheet_names()
    num_shts = bea_book.nsheets
    # Opening "readme" sheet:
    try:
        bea_readme = bea_book.sheet_by_name("readme")
    except xlrd.XLRDError:
        bea_readme = bea_book.sheet_by_index(0)
    # Finding relevant positions in the readme sheet:
    sht_pos = naics.search_ws(bea_readme, "Industry Title", 25, False)
    if(sht_pos == [-1,-1]):
        sht_pos = naics.search_ws(bea_readme, "bea code", 25, False, [0,0], True)
        sht_pos[1] = sht_pos[1] - 1
    if(sht_pos == [-1,-1]):
        print "Error in reading BEA fixed asset \"readme\" sheet."
        return None
    cur_row = sht_pos[0] + 1
    cur_col = sht_pos[1]
    # Finding the number of industries (includes those without bea codes):
    number_of_industries = 0
    while cur_row < bea_readme.nrows:
        #if(str(bea_readme.cell_value(cur_row, cur_col)) != ""):
        if(unicode(bea_readme.cell_value(cur_row, cur_col)).encode('utf8') != ""):    
       # for rownum in xrange(sh.nrows):
    #wr.writerow([unicode(c).encode('utf8') for c in sh.row_values(rownum)])    
            number_of_industries += 1
        cur_row += 1
    # Making a list of BEA codes based on the names of the worksheets:
    bea_codes1 = np.zeros(num_shts-1, dtype=object)
    for index in xrange(1, num_shts):
        bea_codes1[index-1] = str(sht_names[index])
    # Making a list of BEA codes based on info in the readme sheet:
    code_index = 0
    cur_row = sht_pos[0] + 1
    cur_col = sht_pos[1]
    bea_codes2 = np.zeros(number_of_industries, dtype=object)
    while cur_row < bea_readme.nrows:
        if(unicode(bea_readme.cell_value(cur_row, cur_col)).encode('utf8') != ""):
            cur_code = str(bea_readme.cell_value(cur_row, cur_col+1))
            cur_code = cur_code.replace("\xa0", " ").strip()
            bea_codes2[code_index] = cur_code
            code_index += 1
        cur_row += 1
    # Reading in a list of the assets in the BEA file:
    list_file = os.path.join(_BEA_DIR, "detailnonres_list.csv")
    asset_list = pd.read_csv(list_file)
    for i in xrange(0, asset_list.shape[0]):
        asset_list.iloc[i,0] = asset_list.iloc[i,0].replace("\xa0", " ")
        asset_list.iloc[i,0] = asset_list.iloc[i,0].strip()
    # Reading in the corresponding naics codes:
    naics_file = os.path.join(_BEA_DIR, "detailnonres_naics.csv")
    naics_cross = pd.read_csv(naics_file).replace("\xa0", " ")
    naics_inds = naics_cross["Industry"]
    for i in xrange(0, naics_cross.shape[0]):
        naics_inds[i] = naics_inds[i].replace("\xa0", " ").strip()
    # Creating a chart cross-referencing industry names, BEA and NAICS codes.
    chart_cols = ["Industry","BEA Code","NAICS Code"]
    bea_chart = pd.DataFrame(np.zeros(shape=(num_shts-2,3), dtype=object),
                             columns = chart_cols)
    bea_inds = bea_chart["Industry"]
    bea_naics = bea_chart["NAICS Code"]
    cur_row = sht_pos[0] + 1
    cur_col = sht_pos[1]
    num_naics = naics_cross.shape[0]
    # Filling chart with naics codes that are in both lists and the crosswalk:
    naics_counter = 0
    #for i in range(0, num_shts-2):
    i = 0
    for cur_row in range(sht_pos[0]+1, bea_readme.nrows):
        bea_code = unicode(bea_readme.cell_value(cur_row,cur_col+1)).encode('utf8')
        if(str(bea_codes1[i]) == bea_code):
            bea_ind = unicode(bea_readme.cell_value(cur_row,cur_col)).encode('utf8')
            bea_ind = bea_ind.replace('\xa0', ' ').strip()
            bea_ind = bea_ind.replace('\xc2', '').strip()
            bea_inds[i] = bea_ind
            bea_chart["BEA Code"][i] = bea_code
            for k in xrange(0, num_naics):
                naics_counter = (naics_counter+1) % num_naics
                if(naics_inds[naics_counter] == bea_chart["Industry"][i]):
                   bea_naics[i] = naics_cross["NAICS"][naics_counter]
                   i += 1
                   break
        # If they match except one has ".0" at the end:
        elif(str(bea_codes1[i]) == str(bea_readme.cell_value(cur_row, cur_col+1))[:-2]):
            bea_ind = unicode(bea_readme.cell_value(cur_row,cur_col)).encode('utf8')
            bea_ind = bea_ind.replace('\xa0', ' ').strip()
            bea_ind = bea_ind.replace('\xc2', '').strip()
            bea_chart["Industry"][i] = bea_ind
            cur_code = str(bea_readme.cell_value(cur_row, cur_col+1))[:-2]
            bea_chart["BEA Code"][i] = cur_code
            for k in xrange(0, num_naics):
                naics_counter = (naics_counter+1) % num_naics
                if(naics_inds[naics_counter] == bea_inds[i]):
                    bea_naics[i] = naics_cross["NAICS"][naics_counter]
                    i += 1
                    break
    # Initializing the table of assets:
    #cur_sht = bea_book.sheet_by_name(bea_chart["BEA Code"][0])
    #sht_pos = naics.search_ws(cur_sht, "asset codes", 25, False)
    bea_table = pd.DataFrame(np.zeros((asset_list.shape[0],
                                       bea_chart.shape[0])), 
                             columns = bea_chart["BEA Code"])
    # For each industry, calculating 
    for i in bea_chart["BEA Code"]:
        cur_sht = bea_book.sheet_by_name(i)
        sht_pos = naics.search_ws(cur_sht, "asset codes", 25, False)
        for j in xrange(0, len(asset_list)): #xrange(sht_pos[0]+2, cur_sht.nrows):
            cur_asset = asset_list.iloc[j,0]
            for k in xrange(sht_pos[0]+2, cur_sht.nrows):
                cur_cell = unicode(cur_sht.cell_value(k, sht_pos[1]+1)).encode('utf8')
                cur_cell = cur_cell.replace("\xa0", " ").strip()
                if(cur_asset == cur_cell):
                    bea_table[i][j] = float(cur_sht.cell_value(k, cur_sht.ncols-1))
        #bea_table[i] = np.array(cur_sht.col_values(cur_sht.ncols-1, sht_pos[0]+2, cur_sht.nrows))
    # The dollar amounts are in millions:
    bea_table = bea_table.convert_objects(convert_numeric=True).fillna(0)
    bea_table = bea_table * _BEA_IN_FILE_FCTR
    # Initialize tree for assets data:
    fixed_asset_tree = naics.generate_tree()
    for i in xrange(0, len(fixed_asset_tree.enum_inds)):
        fixed_asset_tree.enum_inds[i].data.append(("All", 
                pd.DataFrame(np.zeros((1, asset_list.shape[0])), 
                             columns = asset_list.iloc[:,0])))
        fixed_asset_tree.enum_inds[i].data.append(("Corp", 
                pd.DataFrame(np.zeros((1, asset_list.shape[0])),
                             columns = asset_list.iloc[:,0])))
        fixed_asset_tree.enum_inds[i].data.append(("Non-Corp", 
                pd.DataFrame(np.zeros((1, asset_list.shape[0])),
                             columns = asset_list.iloc[:,0])))
    # Fill in data from BEA's fixed asset table:
    enum_index = len(asset_tree.enum_inds) - 1
    for i in xrange(0, bea_table.shape[1]):
        cur_codes = str(bea_chart["NAICS Code"][i]).split(".")
        tot_share = 0
        all_proportions = naics.get_proportions(cur_codes, asset_tree, 
                                          "FA").iloc[1,:]
        corp_proportions = naics.get_proportions(cur_codes, asset_tree, "FA", 
                                           _CORP_NMS).iloc[1,:]
        non_corp_proportions = naics.get_proportions(cur_codes, asset_tree, 
                                               "FA", _NCORP_NMS).iloc[1,:]
        for code_index in xrange(0, len(cur_codes)):
            for j in xrange(0, len(fixed_asset_tree.enum_inds)):
                enum_index = (enum_index+1) % len(fixed_asset_tree.enum_inds)
                out_dfs = asset_tree.enum_inds[enum_index].data.dfs
                if(sum(out_dfs["FA"].iloc[0,:]) == 0):
                    continue
                all_ratio = 1.0
                corp_ratio = 0.0
                non_corp_ratio = 0.0
                for category in _CORP_NMS:
                    corp_ratio += (out_dfs["FA"][category][0]/
                                        sum(out_dfs["FA"].iloc[0,:]))
                for category in _NCORP_NMS:
                    non_corp_ratio += (out_dfs["FA"][category][0]/
                                            sum(out_dfs["FA"].iloc[0, :]))
                cur_data = fixed_asset_tree.enum_inds[enum_index].data
                ind_codes = cur_data.dfs["Codes:"].iloc[:,0]
                share = naics.compare_codes(cur_codes, ind_codes)
                tot_share += share
                if(share == 0):
                    continue
                num_assets = fixed_asset_tree.enum_inds[0].data.dfs["All"].shape[1]
                for k in xrange(0, num_assets):
                    cur_data.dfs["All"].iloc[0,k] = (bea_table.iloc[k,i]*
                                            all_ratio*
                                            all_proportions[code_index])
                    cur_data.dfs["Corp"].iloc[0,k] = (bea_table.iloc[k,i]*
                                            corp_ratio*
                                            corp_proportions[code_index])
                    cur_data.dfs["Non-Corp"].iloc[0,k] = (bea_table.iloc[k,i]*
                                            non_corp_ratio*
                                            non_corp_proportions[code_index])
                break
            if(tot_share == 1):
                break
    #
    naics.pop_back(fixed_asset_tree, ["All", "Corp", "Non-Corp"])
    naics.pop_forward(tree=fixed_asset_tree, df_list=["All"],
                      blueprint="FA", blue_tree=asset_tree)
    naics.pop_forward(tree=fixed_asset_tree, df_list=["Corp"],
                      blueprint="FA", blue_tree=asset_tree,
                      sub_print=_CORP_NMS)
    naics.pop_forward(tree=fixed_asset_tree, df_list=["Non-Corp"],
                      blueprint="FA", blue_tree=asset_tree, 
                      sub_print=_NCORP_NMS)
    return fixed_asset_tree
Пример #8
0
def read_inventories(output_tree, data_folder):
    # The directory with inventory data:
    inv_folder = os.path.abspath(data_folder + "\\Inventories")
    # Opening BEA's excel file on depreciable assets by industry:
    inv_book = xlrd.open_workbook(os.path.abspath(
                                    inv_folder + "\\Inventories.xls"))
    sht0 = inv_book.sheet_by_index(0)
    num_rows = sht0.nrows
    num_cols = sht0.ncols
    #Find the starting index in worksheet.
    cur_index = naics.search_ws(sht0, 1, 25, True, [0,0], True)
    check_index = naics.search_ws(sht0, "line", 20)
    if(cur_index[1] != check_index[1]):
        print "ERROR"
    # Breaking down by corporate tax status:
    corp_types = ["C Corporations",
                  "Corporate general partners", 
                  "Corporate limited partners"]
    non_corp_types = ["S Corporations",
                      "Individual general partners",
                      "Individual limited partners",
                      "Partnership general partners",
                      "Partnership limited partners",
                      "Tax-exempt organization general partners",
                      "Tax-exempt organization limited partners",
                      "Nominee and other general partners", 
                      "Nominee and other limited partners",
                      "Sole Proprietors"]
    # Reading in the crosswalk:
    inv_cross = pd.read_csv(os.path.abspath(
                                inv_folder + "\\Inventories_Crosswalk.csv"))
    # Creating a tree for the inventory data:
    inv_tree = naics.load_naics(data_folder + "\\2012_NAICS_Codes.csv")
    #
    data_cols = ["All", "Corp", "Non-Corp"]
    for i in inv_tree.enum_inds:
        i.data.append(("Inventories",
                       pd.DataFrame(np.zeros((1, len(data_cols))), 
                                    columns = data_cols)))
    #
    inv_data = np.zeros(inv_cross.shape[0])
    #
    cross_index = 0
    for i in xrange(cur_index[0], num_rows):
        if(cross_index >= inv_cross.shape[0]):
            break
        cur_list = str(sht0.cell_value(i, cur_index[1])).strip()
        cur_name = str(sht0.cell_value(i, cur_index[1]+1)).strip()
        checks = ((str(cur_list) == str(inv_cross["List"][cross_index])) and 
                    (str(cur_name) == str(inv_cross["Industry"][cross_index])))
        if(checks):
            cross_index += 1
            try:
                cur_value = float(sht0.cell_value(i, num_cols-1))
            except ValueError:
                continue
            inv_data[cross_index-1] = cur_value
            # Data is in billions:
            inv_data[cross_index-1] = (10**9) * inv_data[cross_index-1]
    #
    for i in xrange(0, inv_cross.shape[0]):
        cur_codes = inv_cross["NAICS"][i].strip().split(".")
        proportions = naics.get_proportions(cur_codes, output_tree, "INV")
        for j in xrange(0, proportions.shape[1]):
            cur_ind = inv_tree.enum_inds[int(proportions.iloc[0,j])]
            prev_ind = output_tree.enum_inds[int(proportions.iloc[0,j])]
            prev_df = prev_ind.data.dfs["INV"]
            if(sum(prev_df.iloc[0, :]) != 0):
                cur_dfs = ((prev_df/sum(prev_df.iloc[0,:])) *
                                (inv_data[i] * proportions.iloc[1,j]))
                inv_df = cur_ind.data.dfs["Inventories"]
                inv_df["All"] += sum(cur_dfs.iloc[0,:])
                for k in corp_types:
                    inv_df["Corp"] += cur_dfs[k][0]
                for k in non_corp_types:
                    inv_df["Non-Corp"] += cur_dfs[k][0]
    #
    return inv_tree
Пример #9
0
def read_bea(asset_tree):
    # Opening BEA's excel file on depreciable assets by industry:
    bea_book = xlrd.open_workbook(_BEA_ASSET_PATH)
    sht_names = bea_book.sheet_names()
    num_shts = bea_book.nsheets
    # Opening "readme" sheet:
    try:
        bea_readme = bea_book.sheet_by_name("readme")
    except xlrd.XLRDError:
        bea_readme = bea_book.sheet_by_index(0)
    # Finding relevant positions in the readme sheet:
    sht_pos = naics.search_ws(bea_readme, "Industry Title", 25, False)
    if (sht_pos == [-1, -1]):
        sht_pos = naics.search_ws(bea_readme, "bea code", 25, False, [0, 0],
                                  True)
        sht_pos[1] = sht_pos[1] - 1
    if (sht_pos == [-1, -1]):
        print "Error in reading BEA fixed asset \"readme\" sheet."
        return None
    cur_row = sht_pos[0] + 1
    cur_col = sht_pos[1]
    # Finding the number of industries (includes those without bea codes):
    number_of_industries = 0
    while cur_row < bea_readme.nrows:
        #if(str(bea_readme.cell_value(cur_row, cur_col)) != ""):
        if (unicode(bea_readme.cell_value(cur_row, cur_col)).encode('utf8') !=
                ""):
            # for rownum in xrange(sh.nrows):
            #wr.writerow([unicode(c).encode('utf8') for c in sh.row_values(rownum)])
            number_of_industries += 1
        cur_row += 1
    # Making a list of BEA codes based on the names of the worksheets:
    bea_codes1 = np.zeros(num_shts - 1, dtype=object)
    for index in xrange(1, num_shts):
        bea_codes1[index - 1] = str(sht_names[index])
    # Making a list of BEA codes based on info in the readme sheet:
    code_index = 0
    cur_row = sht_pos[0] + 1
    cur_col = sht_pos[1]
    bea_codes2 = np.zeros(number_of_industries, dtype=object)
    while cur_row < bea_readme.nrows:
        if (unicode(bea_readme.cell_value(cur_row, cur_col)).encode('utf8') !=
                ""):
            cur_code = str(bea_readme.cell_value(cur_row, cur_col + 1))
            cur_code = cur_code.replace("\xa0", " ").strip()
            bea_codes2[code_index] = cur_code
            code_index += 1
        cur_row += 1
    # Reading in a list of the assets in the BEA file:
    list_file = os.path.join(_BEA_DIR, "detailnonres_list.csv")
    asset_list = pd.read_csv(list_file)
    for i in xrange(0, asset_list.shape[0]):
        asset_list.iloc[i, 0] = asset_list.iloc[i, 0].replace("\xa0", " ")
        asset_list.iloc[i, 0] = asset_list.iloc[i, 0].strip()
    # Reading in the corresponding naics codes:
    naics_file = os.path.join(_BEA_DIR, "detailnonres_naics.csv")
    naics_cross = pd.read_csv(naics_file).replace("\xa0", " ")
    naics_inds = naics_cross["Industry"]
    for i in xrange(0, naics_cross.shape[0]):
        naics_inds[i] = naics_inds[i].replace("\xa0", " ").strip()
    # Creating a chart cross-referencing industry names, BEA and NAICS codes.
    chart_cols = ["Industry", "BEA Code", "NAICS Code"]
    bea_chart = pd.DataFrame(np.zeros(shape=(num_shts - 2, 3), dtype=object),
                             columns=chart_cols)
    bea_inds = bea_chart["Industry"]
    bea_naics = bea_chart["NAICS Code"]
    cur_row = sht_pos[0] + 1
    cur_col = sht_pos[1]
    num_naics = naics_cross.shape[0]
    # Filling chart with naics codes that are in both lists and the crosswalk:
    naics_counter = 0
    for i in range(0, num_shts - 2):
        for cur_row in range(sht_pos[0] + 1, bea_readme.nrows):
            bea_code = unicode(bea_readme.cell_value(cur_row, cur_col +
                                                     1)).encode('utf8')
            if (str(bea_codes1[i]) == bea_code):
                bea_ind = unicode(bea_readme.cell_value(
                    cur_row, cur_col)).encode('utf8')
                bea_ind = bea_ind.replace('\xa0', ' ').strip()
                bea_inds[i] = bea_ind
                bea_chart["BEA Code"][i] = bea_code
                for k in xrange(0, num_naics):
                    naics_counter = (naics_counter + 1) % num_naics
                    if (naics_inds[naics_counter] == bea_chart["Industry"][i]):
                        bea_naics[i] = naics_cross["NAICS"][naics_counter]
                        break
                break
            # If they match except one has ".0" at the end:
            elif (str(bea_codes1[i]) == str(
                    bea_readme.cell_value(cur_row, cur_col + 1))[:-2]):
                bea_ind = unicode(bea_readme.cell_value(
                    cur_row, cur_col)).encode('utf8')
                bea_ind = bea_ind.replace('\xa0', ' ').strip()
                bea_chart["Industry"][i] = bea_ind
                cur_code = str(bea_readme.cell_value(cur_row,
                                                     cur_col + 1))[:-2]
                bea_chart["BEA Code"][i] = cur_code
                for k in xrange(0, num_naics):
                    naics_counter = (naics_counter + 1) % num_naics
                    if (naics_inds[naics_counter] == bea_inds[i]):
                        bea_naics[i] = naics_cross["NAICS"][naics_counter]
                        break
                break
    # Initializing the table of assets:
    #cur_sht = bea_book.sheet_by_name(bea_chart["BEA Code"][0])
    #sht_pos = naics.search_ws(cur_sht, "asset codes", 25, False)
    bea_table = pd.DataFrame(np.zeros(
        (asset_list.shape[0], bea_chart.shape[0])),
                             columns=bea_chart["BEA Code"])
    # For each industry, calculating
    for i in bea_chart["BEA Code"]:
        cur_sht = bea_book.sheet_by_name(i)
        sht_pos = naics.search_ws(cur_sht, "asset codes", 25, False)
        for j in xrange(
                0, len(asset_list)):  #xrange(sht_pos[0]+2, cur_sht.nrows):
            cur_asset = asset_list.iloc[j, 0]
            for k in xrange(sht_pos[0] + 2, cur_sht.nrows):
                cur_cell = unicode(cur_sht.cell_value(k, sht_pos[1] +
                                                      1)).encode('utf8')
                cur_cell = cur_cell.replace("\xa0", " ").strip()
                if (cur_asset == cur_cell):
                    bea_table[i][j] = float(
                        cur_sht.cell_value(k, cur_sht.ncols - 1))
        #bea_table[i] = np.array(cur_sht.col_values(cur_sht.ncols-1, sht_pos[0]+2, cur_sht.nrows))
    # The dollar amounts are in millions:
    bea_table = bea_table.convert_objects(convert_numeric=True).fillna(0)
    bea_table = bea_table * _BEA_IN_FILE_FCTR
    # Initialize tree for assets data:
    fixed_asset_tree = naics.generate_tree()
    for i in xrange(0, len(fixed_asset_tree.enum_inds)):
        fixed_asset_tree.enum_inds[i].data.append(
            ("All",
             pd.DataFrame(np.zeros((1, asset_list.shape[0])),
                          columns=asset_list.iloc[:, 0])))
        fixed_asset_tree.enum_inds[i].data.append(
            ("Corp",
             pd.DataFrame(np.zeros((1, asset_list.shape[0])),
                          columns=asset_list.iloc[:, 0])))
        fixed_asset_tree.enum_inds[i].data.append(
            ("Non-Corp",
             pd.DataFrame(np.zeros((1, asset_list.shape[0])),
                          columns=asset_list.iloc[:, 0])))
    # Fill in data from BEA's fixed asset table:
    enum_index = len(asset_tree.enum_inds) - 1
    for i in xrange(0, bea_table.shape[1]):
        cur_codes = str(bea_chart["NAICS Code"][i]).split(".")
        tot_share = 0
        all_proportions = naics.get_proportions(cur_codes, asset_tree,
                                                "FA").iloc[1, :]
        corp_proportions = naics.get_proportions(cur_codes, asset_tree, "FA",
                                                 _CORP_NMS).iloc[1, :]
        non_corp_proportions = naics.get_proportions(cur_codes, asset_tree,
                                                     "FA",
                                                     _NCORP_NMS).iloc[1, :]
        for code_index in xrange(0, len(cur_codes)):
            for j in xrange(0, len(fixed_asset_tree.enum_inds)):
                enum_index = (enum_index + 1) % len(fixed_asset_tree.enum_inds)
                out_dfs = asset_tree.enum_inds[enum_index].data.dfs
                if (sum(out_dfs["FA"].iloc[0, :]) == 0):
                    continue
                all_ratio = 1.0
                corp_ratio = 0.0
                non_corp_ratio = 0.0
                for category in _CORP_NMS:
                    corp_ratio += (out_dfs["FA"][category][0] /
                                   sum(out_dfs["FA"].iloc[0, :]))
                for category in _NCORP_NMS:
                    non_corp_ratio += (out_dfs["FA"][category][0] /
                                       sum(out_dfs["FA"].iloc[0, :]))
                cur_data = fixed_asset_tree.enum_inds[enum_index].data
                ind_codes = cur_data.dfs["Codes:"].iloc[:, 0]
                share = naics.compare_codes(cur_codes, ind_codes)
                tot_share += share
                if (share == 0):
                    continue
                num_assets = fixed_asset_tree.enum_inds[0].data.dfs[
                    "All"].shape[1]
                for k in xrange(0, num_assets):
                    cur_data.dfs["All"].iloc[0,
                                             k] = (bea_table.iloc[k, i] *
                                                   all_ratio *
                                                   all_proportions[code_index])
                    cur_data.dfs["Corp"].iloc[0, k] = (
                        bea_table.iloc[k, i] * corp_ratio *
                        corp_proportions[code_index])
                    cur_data.dfs["Non-Corp"].iloc[0, k] = (
                        bea_table.iloc[k, i] * non_corp_ratio *
                        non_corp_proportions[code_index])
                break
            if (tot_share == 1):
                break
    #
    naics.pop_back(fixed_asset_tree, ["All", "Corp", "Non-Corp"])
    naics.pop_forward(tree=fixed_asset_tree,
                      df_list=["All"],
                      blueprint="FA",
                      blue_tree=asset_tree)
    naics.pop_forward(tree=fixed_asset_tree,
                      df_list=["Corp"],
                      blueprint="FA",
                      blue_tree=asset_tree,
                      sub_print=_CORP_NMS)
    naics.pop_forward(tree=fixed_asset_tree,
                      df_list=["Non-Corp"],
                      blueprint="FA",
                      blue_tree=asset_tree,
                      sub_print=_NCORP_NMS)
    return fixed_asset_tree