def form_stock2_second_indus(panel_path, save_path): ''' :param panel_path: 月度数据的存储地址 :param save_path: 目标文件的存储地址 :return: ''' # 把股票的月度数据转换为行业的形式 data = Data() indus_infor = data.secondindustryname indus_infor = data.reindex(indus_infor) dirlist = os.listdir(panel_path) indux_wei_total = pd.DataFrame() for f in dirlist: stock_wei = pd.read_csv(os.path.join(panel_path, f), encoding='gbk', engine='python') stock_wei = stock_wei.set_index('wind_code') if f.split('.')[0] in indus_infor.columns: stock_wei['second_indus'] = indus_infor[f.split('.')[0]] else: stock_wei['second_indus'] = indus_infor[indus_infor.columns[-1]] stock_wei = stock_wei.dropna(axis=0, how='any') stock_wei['i_weight'] = 100 * stock_wei['i_weight'] / np.sum( stock_wei['i_weight']) grouped = stock_wei[['i_weight', 'second_indus']].groupby('second_indus') indus_wei = grouped.sum() indus_wei = indus_wei.T indus_wei.index = [f.split('.')[0]] indux_wei_total = pd.concat([indux_wei_total, indus_wei], axis=0) indux_wei_total = indux_wei_total.fillna(0) indux_wei_total.to_csv(os.path.join(save_path, '二级行业权重.csv'), encoding='gbk')
def get_indus_wt_in_index(index, indus_level='second'): index_wt = get_stock_wt_in_index(index) data = Data() if indus_level == 'first': industry = data.firstindustryname elif indus_level == 'second': industry = data.secondindustryname industry = data.reindex(industry) industry = industry.loc[index_wt.index, index_wt.columns] indus_wt = pd.DataFrame() for d in index_wt.columns: # d = index_wt.columns[0] tmp_df = pd.concat([index_wt.loc[:, d], industry.loc[:, d]], axis=1) tmp_df = tmp_df.dropna() tmp_df.columns = ['wt', 'industry'] indus_wt_tmp = tmp_df['wt'].groupby(tmp_df['industry']).sum() indus_wt_df = pd.DataFrame(indus_wt_tmp.values, index=indus_wt_tmp.index, columns=[d]) indus_wt = pd.concat([indus_wt, indus_wt_df], axis=1) indus_wt = indus_wt.fillna(0) return indus_wt