def update_future_price(): data = Data() try: nh_future_price_daily = data.nh_future_price_daily.T st_date = nh_future_price_daily.index[-1] - timedelta(10) except Exception as e: nh_future_price_daily = pd.DataFrame() st_date = datetime(2010, 1, 1) ed_date = datetime.today().strftime("%Y%m%d") add_df = pd.DataFrame() for key in NH_index_dict.keys(): df_tmp = pro.index_daily(ts_code=key, start_date=st_date.strftime('%Y%m%d'), end_date=ed_date) df_tmp = df_tmp.set_index('trade_date') df_tmp.index = pd.to_datetime(df_tmp.index) df_tmp.sort_index(inplace=True) add_df = pd.concat( [add_df, pd.DataFrame({key: df_tmp['close']})], axis=1) # nh_future_price_daily to_del_index = [ i for i in add_df.index if i in nh_future_price_daily.index ] nh_future_price_daily.drop(to_del_index, axis=0, inplace=True) nh_future_price_daily = pd.concat([nh_future_price_daily, add_df], axis=0) save_p = r'D:\commodity_datebase\price_data' data.save(nh_future_price_daily, 'nh_future_price_daily.csv', save_path=save_p)
def compute_future_beta(): # 存储地址为:D:\Datebase_Stock\Date\index\stock_future\sf_beta.csv data = Data() sf_close_daily = data.sf_close_daily index_price_daily = data.index_price_daily.T # 求一下日期的交集,避免日期不同的潜在问题 tt = list(set(sf_close_daily.columns) & set(index_price_daily.index)) tt.sort() sf_close_daily = sf_close_daily[tt] index_price_daily = index_price_daily.loc[tt, :] sf_beta = pd.DataFrame() for c, se in sf_close_daily.iterrows(): if 'IC' in c: tmp_i = index_price_daily['ZZ500'] elif 'IF' in c: tmp_i = index_price_daily['HS300'] elif 'IH' in c: tmp_i = index_price_daily['SZ50'] else: print('Code Bug') raise ValueError # 去掉Nan tmp_c = se.dropna() tmp_i = tmp_i[tmp_c.index] if len(tmp_c) > 22: bet = rolling_regress_1(tmp_i, tmp_c, window=22) sf_beta = pd.concat([sf_beta, pd.DataFrame({c: bet}).T], axis=0) p = os.path.join(data_dair, 'index', 'stock_future') data.save(sf_beta, 'sf_beta', p)
def update_f_data_from_wind(special_year=2015): path = os.path.join(data_dair, 'download_from_juyuan') w.start() data = Data() stock_basic_inform = data.stock_basic_inform mes = generate_months_ends() iterms = [ # StructWind('rd_exp', 'Q', 'unit=1;rptType=1;Days=Alldays'), StructWind('west_netprofit_YOY', 'M') ] codes_str = '' for i in stock_basic_inform.index: codes_str = codes_str + ',' + i codes_str = codes_str[1:] eds = datetime.today().strftime("%Y-%m-%d") for it in iterms: name = it.name period = it.period other = it.other try: tmp_df = eval('data.' + name.lower()) tds = tmp_df.columns[-1].strftime("%Y-%m-%d") except Exception as e: tmp_df = pd.DataFrame() tds = datetime(2009, 1, 1).strftime("%Y-%m-%d") # "unit=1;rptType=1;Period=Q;Days=Alldays" if other: oth = 'Period=' + period + ';' + other else: oth = 'Period=' + period if special_year: # = 2019 mes0 = [m for m in mes if m.year == special_year] tds = mes0[0].strftime("%Y-%m-%d") eds = mes0[-1].strftime("%Y-%m-%d") if not special_year and period == 'Q' and (datetime.today() - tds).days < 110: continue elif not special_year and period == 'M' and (datetime.today() - tds).days < 20: continue res_tmp = w.wsd(codes_str, name, tds, eds, oth, usedf=True) res_tmp1 = res_tmp[1] res_tmp1 = res_tmp1.T tmp_df = pd.concat([tmp_df, res_tmp1], axis=1) # 读取本地数据时和Wind提取数据时的时间格式可能不一样,统一一下才能排序 tmp_df.columns = pd.to_datetime(tmp_df.columns) # 把columns排序 tt = list(tmp_df.columns) tt.sort() tmp_df = tmp_df[tt] data.save(tmp_df, name.lower(), save_path=path)
def update_daily_basic(): data = Data() pb_df = data.pb_daily pe_df = data.pe_daily turnover_df = data.turnoverrate_daily negotiablemv_df = data.negotiablemv_daily # 流通市值(万元) totalmv_df = data.totalmv_daily # 总市值(万元) st = np.min([ pb_df.columns[-1], pe_df.columns[-1], turnover_df.columns[-1], negotiablemv_df.columns[-1], totalmv_df.columns[-1] ]) # turnover_rate_f float 换手率(自由流通股) # total_mv float 总市值 (万元) ed = datetime.today() - timedelta(1) tds = trade_days() days_to_update = [d for d in tds if st <= d <= ed] if len(days_to_update) == 0: print('日度指标数据:已经更新到最新数据,无需更新,自动退出') return None for d in days_to_update: # d = days_to_update[0] d_str = d.strftime("%Y%m%d") tmp_df = pro.daily_basic( ts_code='', trade_date=d_str, fields='ts_code,turnover_rate,pe,pb,total_mv,circ_mv') tmp_df = tmp_df.set_index('ts_code').sort_index() pb_df = insert_to_df(pb_df, pd.DataFrame({d: tmp_df['pb']})) pe_df = insert_to_df(pe_df, pd.DataFrame({d: tmp_df['pe']})) turnover_df = insert_to_df(turnover_df, pd.DataFrame({d: tmp_df['turnover_rate']})) totalmv_df = insert_to_df(totalmv_df, pd.DataFrame({d: tmp_df['total_mv']})) negotiablemv_df = insert_to_df(negotiablemv_df, pd.DataFrame({d: tmp_df['circ_mv']})) sleep(0.33) history_file = os.path.join(date_dair, 'download_from_juyuan') data.save(pb_df, 'pb_daily.csv', save_path=history_file) data.save(pe_df, 'pe_daily.csv', save_path=history_file) data.save(negotiablemv_df, 'NegotiableMV_daily.csv', save_path=history_file) data.save(totalmv_df, 'TotalMV_daily.csv', save_path=history_file) data.save(turnover_df, 'TurnoverRate_daily.csv', save_path=history_file)
def compute_changepect_open_daily(): data = Data() open_daily = data.openprice_daily adjfactor = data.adjfactor open_daily = open_daily * adjfactor open_daily.dropna(axis=1, how='all', inplace=True) open_daily_shift = open_daily.shift(1, axis=1) changepect_open_daily = open_daily / open_daily_shift changepect_open_daily = (changepect_open_daily - 1) * 100 changepect_open_daily.dropna(how='all', axis=1, inplace=True) data.save(changepect_open_daily, 'changepect_open_daily'.upper())
def update_futmap(): data = Data() try: fut_map = data.fut_map.T start_d = fut_map.index[-1].strftime("%Y%m%d") except Exception as e: fut_map = pd.DataFrame() start_d = datetime(2009, 1, 1).strftime("%Y%m%d") end_d = datetime.today().strftime("%Y%m%d") domain_if = pro.fut_mapping(ts_code='IF.CFX', start_date=start_d, end_date=end_d) domain_ih = pro.fut_mapping(ts_code='IH.CFX', start_date=start_d, end_date=end_d) domain_ic = pro.fut_mapping(ts_code='IC.CFX', start_date=start_d, end_date=end_d) domain_if.set_index('trade_date', inplace=True) domain_ih.set_index('trade_date', inplace=True) domain_ic.set_index('trade_date', inplace=True) domain_if.drop('ts_code', axis=1, inplace=True) domain_ih.drop('ts_code', axis=1, inplace=True) domain_ic.drop('ts_code', axis=1, inplace=True) domain_if.columns = ['IF'] domain_ih.columns = ['IH'] domain_ic.columns = ['IC'] domain_if.index = pd.to_datetime(domain_if.index) domain_if.sort_index(inplace=True) domain_ih.index = pd.to_datetime(domain_ih.index) domain_ih.sort_index(inplace=True) domain_ic.index = pd.to_datetime(domain_ic.index) domain_ic.sort_index(inplace=True) domain_fut = pd.concat([domain_if, domain_ih, domain_ic], axis=1) domain_fut = domain_fut.applymap(lambda x: x.split('.CFX')[0] + '.CFE' if isinstance(x, str) else x) dupl = [i for i in domain_fut.index if i in fut_map.index] domain_fut.drop(dupl, axis=0, inplace=True) fut_map = pd.concat([fut_map, domain_fut], axis=0) p = os.path.join(date_dair, 'index') data.save(fut_map, 'fut_map', save_path=p) print('期货主力合约映射表下载完毕')
def Rps(self): data = Data() all_codes = data.stock_basic_inform all_codes = pd.to_datetime(all_codes['ipo_date'.upper()]) close_daily = data.closeprice_daily adjfactor = data.adjfactor close_price = close_daily * adjfactor close_price.dropna(axis=1, how='all', inplace=True) # 剔除上市一年以内的情况,把上市二年以内的股票数据都设为nan for i, row in close_price.iterrows(): if i not in all_codes.index: row[:] = np.nan continue d = all_codes[i] row[row.index[row.index < d + timedelta(200)]] = np.nan ext_120 = close_price / close_price.shift(periods=120, axis=1) ext_120.dropna(how='all', axis=1, inplace=True) rps_120 = ext_120.apply(scaler, scaler_max=100, scaler_min=1) rps = rps_120 rps.dropna(how='all', axis=1, inplace=True) res = rps.apply(scaler, scaler_max=100, scaler_min=1) res = CALFUNC.del_dat_early_than(res, START_YEAR) return res
def month_return_compare_to_market_index(stock_list, his_month): data = Data() changePCT = data.changepct_monthly ff = None for c in changePCT.columns: if c.year == his_month.year and c.month == his_month.month: ff = c break res2 = changePCT.loc[stock_list, ff] index_path = r'D:\pythoncode\IndexEnhancement\指数相关' index_price = pd.read_csv(os.path.join(index_path, 'index_price_monthly.csv'), engine='python') index_price = index_price.set_index(index_price.columns[0]) index_price.index = pd.to_datetime(index_price.index) index_r = (index_price - index_price.shift(1)) / index_price.shift(1) fff = None for c in index_r.index: if c.year == his_month.year and c.month == his_month.month: fff = c break res1 = pd.DataFrame({fff: index_r.loc[fff, :].drop_duplicates()}) tt = pd.DataFrame(data=res2.mean(), index=['组合'], columns=[ff]) res1 = pd.concat([res1, tt], axis=0) return res1, res2
def __init__(self, factor_path, save_path, sentinel=1000, update_only=False): self.data = Data() self.sentinel = sentinel # if not update_only: # self.dates_d = sorted(self.adjfactor.columns) # self.dates_m = sorted(self.pct_chg_M.columns) self.save_path = save_path self.factor_path = factor_path
def del_market(stock_pool, to_del_mkt): data = Data() stock_basic = data.stock_basic_inform mkt = stock_basic[['MKT']] for col, v in stock_pool.iteritems(): si = [i for i in mkt[mkt[mkt.columns[0]] == to_del_mkt].index if i in stock_pool.index] v[si] = False return stock_pool
def eps_over_80(stock_pool): data = Data() rps = data.rps tmp = rps[[rps.columns[-1]]] stock_pool = pd.concat([stock_pool, tmp], axis=1, join='inner') to_save = stock_pool.index[stock_pool[rps.columns[-1]] > 80] stock_pool = stock_pool.loc[to_save, :] return stock_pool
def name_to_code(names_list): data = Data() all_stocks_code = data.stock_basic_inform all_stocks_code = all_stocks_code[['sec_name'.upper()]] all_stocks_code['code'.upper()] = all_stocks_code.index all_stocks_code = all_stocks_code.set_index('sec_name'.upper()) res = list(all_stocks_code.loc[names_list, 'code'.upper()]) return res
def Rps(self): data = Data() all_codes = data.stock_basic_inform all_codes = pd.to_datetime(all_codes['ipo_date'.upper()]) close_daily = data.closeprice_daily adjfactor = data.adjfactor close_price = close_daily * adjfactor close_price.dropna(axis=1, how='all', inplace=True) # 剔除上市一年以内的情况,把上市二年以内的股票数据都设为nan for i, row in close_price.iterrows(): if i not in all_codes.index: row[:] = np.nan continue d = all_codes[i] row[row.index[row.index < d + timedelta(200)]] = np.nan if self._status == 'all': ext_120 = close_price / close_price.shift(periods=120, axis=1) ext_120.dropna(how='all', axis=1, inplace=True) rps_120 = ext_120.apply(scaler, scaler_max=100, scaler_min=1) rps = rps_120 rps.dropna(how='all', axis=1, inplace=True) res = rps.apply(scaler, scaler_max=100, scaler_min=1) res = CALFUNC.del_dat_early_than(res, START_YEAR) elif self._status == 'update': hased_rps = data.RPS to_update = [ col for col in close_price.columns if col not in hased_rps.columns and col > hased_rps.columns[-1] ] if len(to_update) == 0: print('RPS无需要更新的部分') return hased_rps st = to_update[0] st_loc = np.where(close_price.columns == st)[0][0] st_loc = st_loc - 121 close_price_new = close_price.iloc[:, st_loc:] ext_120 = close_price_new / close_price_new.shift(periods=120, axis=1) ext_120.dropna(how='all', axis=1, inplace=True) rps_120 = ext_120.apply(scaler, scaler_max=100, scaler_min=1) rps_120.dropna(how='all', axis=1, inplace=True) res0 = rps_120.apply(scaler, scaler_max=100, scaler_min=1) hased_rps[res0.columns] = res0 res = hased_rps return res
def main(p_dict, fp, is_ind_neu, is_size_neu, is_plate_neu, special_plate=None, selection=None): """ is_ind_neu : 是否做行业中性化处理,对股票多因子需要,做行业多因子时不需要 输出: 预处理后的因子截面数据(如2009-01-23.csv文件) 顺序:缺失值填充、去极值、中性化、标准化 (因输入的截面数据中所含财务类因子默认已经过 财务日期对齐处理,故在此不再进行该步处理) 注:针对无需处理的因子,如Rps,把因子名称添加到constant文件中的info_cols变量中,相关函数会通过import该变量的方式导入 并跳过处理过程 """ file_path = p_dict['file_path'] save_path = p_dict['save_path'] # 读取原始因子截面数据 try: data = pd.read_csv(os.path.join(file_path, fp), engine='python', encoding='gbk') except Exception as e: print('debug') if 'No' in data.columns: data = data.set_index('No') # 若针对特定板块,则删除其他板块的股票数据 if special_plate: data_ = Data() stock_basic = data_.stock_basic_inform sw_1 = stock_basic[['申万一级行业']] stock_list = list(sw_1.index[sw_1[sw_1.columns[0]] == special_plate]) codes = [i for i in data.index if data.loc[i, 'Code'] in stock_list] data = data.loc[codes, :] data.index = range(0, len(data)) # 历史回测:删除一些未上市的股票、下个月未开盘的股票 # 跟踪:无动作 data_to_process = drop_some(data) # 预处理步骤依次进行 data_to_process = fill_na(data_to_process) # 缺失值填充 if len(data_to_process) == 0: print('debug') data_to_process = winsorize(data_to_process) # 去极值 if is_ind_neu or is_size_neu: data_to_process = neutralize(data_to_process, ind_neu=is_ind_neu, size_neu=is_size_neu) # 中性化 data_to_process = standardize(data_to_process) # 标准化 data_final = data_to_process if data_final.index.name != 'No': data_final.index = range(1, len(data_final)+1) data_final.index.name = 'No' data_final.to_csv(os.path.join(save_path, fp), encoding='gbk')
def get_latest_updated_date(): data = Data() c = data.closeprice_daily o = data.openprice_daily adj = data.adjfactor mv = data.negotiablemv_daily pe = data.pe_daily res = np.min([c.columns[-1], o.columns[-1], adj.columns[-1], mv.columns[-1], pe.columns[-1]]) return res
def del_industry(stock_pool, to_del_indus): data = Data() stock_basic = data.stock_basic_inform sw_1 = stock_basic[['申万一级行业']] for col, v in stock_pool.iteritems(): for ind in to_del_indus: si = [i for i in sw_1[sw_1[sw_1.columns[0]] == ind].index if i in stock_pool.index] v[si] = False return stock_pool
def get_net_buy_rate(): data = Data() negotiablemv = data.negotiablemv_daily main_net_buy_amount = data.main_net_buy_amount dat_pd = main_net_buy_amount / negotiablemv dat_pd.dropna(how='all', axis=1, inplace=True) save_path = r'D:\pythoncode\IndexEnhancement\barra_cne6\download_from_juyuan' dat_pd.to_csv(os.path.join(save_path, 'main_net_buy_ratio.csv'), encoding='gbk')
def form_stock2_second_indus(panel_path, save_path): ''' :param panel_path: 月度数据的存储地址 :param save_path: 目标文件的存储地址 :return: ''' # 把股票的月度数据转换为行业的形式 data = Data() indus_infor = data.secondindustryname indus_infor = data.reindex(indus_infor) dirlist = os.listdir(panel_path) indux_wei_total = pd.DataFrame() for f in dirlist: stock_wei = pd.read_csv(os.path.join(panel_path, f), encoding='gbk', engine='python') stock_wei = stock_wei.set_index('wind_code') if f.split('.')[0] in indus_infor.columns: stock_wei['second_indus'] = indus_infor[f.split('.')[0]] else: stock_wei['second_indus'] = indus_infor[indus_infor.columns[-1]] stock_wei = stock_wei.dropna(axis=0, how='any') stock_wei['i_weight'] = 100 * stock_wei['i_weight'] / np.sum( stock_wei['i_weight']) grouped = stock_wei[['i_weight', 'second_indus']].groupby('second_indus') indus_wei = grouped.sum() indus_wei = indus_wei.T indus_wei.index = [f.split('.')[0]] indux_wei_total = pd.concat([indux_wei_total, indus_wei], axis=0) indux_wei_total = indux_wei_total.fillna(0) indux_wei_total.to_csv(os.path.join(save_path, '二级行业权重.csv'), encoding='gbk')
def rps_factor(rps_min=50, rps_max=100): data = Data() rps = data.RPS rps.fillna(0, inplace=True) rps_cond = float_2_bool_df(rps, min_para=rps_min, max_para=rps_max) # 用于回测,向右移动一期 rps = rps.shift(1, axis=1) rps_cond = rps_cond.shift(1, axis=1) rps.dropna(axis=1, how='all', inplace=True) rps_cond.dropna(axis=1, how='all', inplace=True) return rps_cond
def update_index_wei(): w.start() data = Data() zz500_wt = data.zz500_wt hs300_wt = data.hs300_wt mes = generate_months_ends() # 先删除一些不是月末的数据 to_del = [c for c in zz500_wt.columns if c not in mes] if len(to_del) > 0: zz500_wt = zz500_wt.drop(to_del, axis=1) to_del = [c for c in hs300_wt.columns if c not in mes] if len(to_del) > 0: hs300_wt = hs300_wt.drop(to_del, axis=1) new_mes = [m for m in mes if m > zz500_wt.columns[-1]] for m in new_mes: m_str = m.strftime("%Y-%m-%d") # 沪深300 res = w.wset("indexconstituent", "date=" + m_str + ";windcode=000300.SH", usedf=True) res = res[1] res.set_index('wind_code', inplace=True) to_add = pd.DataFrame({m: res['i_weight']}) hs300_wt = pd.concat([hs300_wt, to_add], axis=1) # 中证500 res = w.wset("indexconstituent", "date=" + m_str + ";windcode=000905.SH", usedf=True) res = res[1] res.set_index('wind_code', inplace=True) to_add = pd.DataFrame({m: res['i_weight']}) zz500_wt = pd.concat([zz500_wt, to_add], axis=1) data.save(hs300_wt, 'hs300_wt', save_path=os.path.join(data_dair, 'index')) data.save(zz500_wt, 'zz500_wt', save_path=os.path.join(data_dair, 'index'))
def __init__(self, alpha_factor_names=industry_factor_names, update_only=True, max_num=20): self._data = Data() self.stock_factor_path = os.path.join(root_dair, '因子预处理模块', '因子') self.industry_factor_path = os.path.join(root_dair, '行业多因子', '申万三级', '因子') self.save_path = os.path.join(root_dair, '行业多因子', '申万三级') self.icir_e = None self.factors = alpha_factor_names self.update_only = update_only self.max_num = max_num self.indus_selected = None
def from_stock_wei_2_industry_wei(wei_df): data = Data() all_stocks_code = data.stock_basic_inform all_stocks_code = all_stocks_code[['申万一级行业']] wei_df = wei_df.fillna(0) res = pd.DataFrame() for col, se in wei_df.iteritems(): tmp_df = pd.DataFrame({col: se}) to_group = pd.concat([tmp_df, all_stocks_code], axis=1) to_group.fillna(0, inplace=True) grouped = to_group.groupby('申万一级行业').sum() if 0 in grouped.index: grouped.drop(0, axis=0, inplace=True) res = pd.concat([res, grouped], axis=1) return res
def RDtosales(self): data = Data() rd_exp = data.rd_exp revenue = data.operatingrevenue rd_exp = CALFUNC.del_dat_early_than(rd_exp, 2018) revenue = CALFUNC.del_dat_early_than(revenue, 2018) res = rd_exp / revenue res = adjust_months(res) res = append_df(res) to_del = res.columns[res.isna().sum() / len(res) > 0.9] res.drop(to_del, axis=1, inplace=True) return res
def update_pct_monthly(): data = Data() close_price = data.closeprice_daily adj = data.adjfactor months_ends = generate_months_ends() close_price = adj * close_price new_columns = [c for c in close_price.columns if c >= datetime(2006, 1, 1)] close_price = close_price[new_columns] new_columns = [c for c in close_price.columns if c in months_ends] close_me = close_price[new_columns] pct_monthly_pd = close_me / close_me.shift(1, axis=1) - 1 history_file = os.path.join(date_dair, 'download_from_juyuan') pct_monthly_pd.to_csv(os.path.join(history_file, 'ChangePCT_monthly.csv'), encoding='gbk')
def compute_month_value(): data = Data() basic_path = r'D:\pythoncode\IndexEnhancement\barra_cne6\download_from_juyuan' months_ends = generate_months_ends() pb_daily = data.pb_daily my_cols = [m for m in months_ends if m in pb_daily.columns] pb_monthly = pb_daily[my_cols] pb_monthly.to_csv(os.path.join(basic_path, 'pb_monthly.csv'), encoding='gbk') pe_daily = data.pe_daily my_cols = [m for m in months_ends if m in pe_daily.columns] pe_monthly = pe_daily[my_cols] pe_monthly.to_csv(os.path.join(basic_path, 'pe_monthly.csv'), encoding='gbk')
def compute_month_value(): data = Data() basic_path = os.path.join(date_dair, 'download_from_juyuan') months_ends = generate_months_ends() pb_daily = data.pb_daily my_cols = [m for m in months_ends if m in pb_daily.columns] pb_monthly = pb_daily[my_cols] pb_monthly.to_csv(os.path.join(basic_path, 'pb_monthly.csv'), encoding='gbk') pe_daily = data.pe_daily my_cols = [m for m in months_ends if m in pe_daily.columns] pe_monthly = pe_daily[my_cols] pe_monthly.to_csv(os.path.join(basic_path, 'pe_monthly.csv'), encoding='gbk')
def update_pct_monthly(): data = Data() close_price = data.closeprice_daily adj = data.adjfactor months_ends = generate_months_ends() close_price = adj * close_price new_columns = [c for c in close_price.columns if c >= datetime(2006, 1, 1)] close_price = close_price[new_columns] new_columns = [c for c in close_price.columns if c in months_ends] close_me = close_price[new_columns] pct_monthly_pd = close_me / close_me.shift(1, axis=1) - 1 basic_path = r'D:\pythoncode\IndexEnhancement\barra_cne6\download_from_juyuan' pct_monthly_pd.to_csv(os.path.join(basic_path, 'ChangePCT_monthly.csv'), encoding='gbk')
def easy_bt(wei_stocks, basic_return_infor): data = Data() changepct_daily = data.CHANGEPECT_OPEN_DAILY changepct_daily = changepct_daily.shift(-1, axis=1) changepct_daily.dropna(how='all', axis=1, inplace=True) changepct_daily = changepct_daily / 100 wei_stocks, changepct_daily = align(wei_stocks, changepct_daily) # fee_type='No_fee' 不计算佣金和印花税, 'fee_1'计算佣金和印花税,不计算冲击成本 daily_return, net_value = back_test(changepct_daily, wei_stocks, fee_type='fee_1') # plt.plot(net_cpd) # 若有基准日度收益率,则计算累计超额收益率 if isinstance(basic_return_infor, str): # 有基准收益,算超额收益 basic_return = pd.read_csv(basic_return_infor, engine='python') basic_return = basic_return.set_index('date') if 'daily_return' in basic_return.columns: daily_excess_r = daily_return['daily_return'] - basic_return['daily_return'] # 若没有日度收益数据,则根据日度净值数据计算出日度收益收益数据 elif 'daily_return' not in basic_return.columns and 'net_value' in basic_return.columns: basic_return['daily_return'] = basic_return['net_value']/basic_return['net_value'].shift(1) - 1 daily_excess_r = daily_return['daily_return'] - basic_return['daily_return'] daily_excess_r.dropna(inplace=True) daily_excess_cum = (daily_excess_r + 1).cumprod() cum_excess_df = pd.DataFrame({'cum_excess_ret': daily_excess_cum}) elif isinstance(basic_return_infor, pd.DataFrame): if 'daily_return' not in basic_return_infor.columns and 'net_value' in basic_return_infor.columns: basic_return_infor['daily_return'] = basic_return_infor['net_value'] / \ basic_return_infor['net_value'].shift(1) - 1 daily_excess_r = daily_return['daily_return'] - basic_return_infor['daily_return'] daily_excess_r.dropna(inplace=True) daily_excess_cum = (daily_excess_r + 1).cumprod() cum_excess_df = pd.DataFrame({'cum_excess_ret': daily_excess_cum}) else: cum_excess_df = None return daily_return, net_value, cum_excess_df
def latest_pool(self, method, add_infor=False): self.get_stock_pool() stock_pool = self.rov_of_all_stocks(method) if stock_pool.empty: return None tmp = stock_pool[stock_pool.columns[-1]] tmp = tmp[tmp.index[tmp != 0]] tmp.sort_values(inplace=True) s_list = list(tmp.index) # 添加股票名称 data = Data() stock_basic = data.stock_basic_inform res = stock_basic.loc[s_list, 'SEC_NAME'] res_df = pd.DataFrame({'SEC_NAME': res}) res_df.index.name = 'CODE' # res_df.to_csv('D://库存表.csv', encoding='gbk') TODO # 是否添加概念数据和调入股票池时间数据 if add_infor: # 添加调入股票池日期数据 res_df['跳入股票池日期'] = None for k, v in stock_pool.loc[s_list, :].iterrows(): for i in range(len(v) - 1, -1, -1): if not v[i]: res_df.loc[k, '跳入股票池日期'] = v.index[i + 1] break # 添加概念数据。 concept = data.concept res_df = pd.concat([res_df, concept], axis=1, join='inner') for k, v in res_df['CONCEPT'].items(): try: res_df.loc[k, 'CONCEPT'] = v.replace('[', '').replace( ']', '').replace('\'', '') except Exception: pass return res_df, stock_pool
def keep_industry(stock_pool, to_keep_indus_list, industry_type='sw'): data = Data() stock_basic = data.stock_basic_inform if industry_type == 'sw': industry_df = stock_basic[['申万一级行业']] elif industry_type == 'zx': industry_df = stock_basic[['中信一级行业']] for col, v in stock_pool.iteritems(): for ind in to_keep_indus_list: # 选出该行业的股票 tmp = industry_df[industry_df[industry_df.columns[0]] == ind].index # 也在stock_pool的index里面 si = [i for i in tmp if i in stock_pool.index] # 其余行业的股票 se = list(set(stock_pool.index).difference(set(si))) # 均为否 v[se] = False return stock_pool