def compute_future_beta(): # 存储地址为:D:\Datebase_Stock\Date\index\stock_future\sf_beta.csv data = Data() sf_close_daily = data.sf_close_daily index_price_daily = data.index_price_daily.T # 求一下日期的交集,避免日期不同的潜在问题 tt = list(set(sf_close_daily.columns) & set(index_price_daily.index)) tt.sort() sf_close_daily = sf_close_daily[tt] index_price_daily = index_price_daily.loc[tt, :] sf_beta = pd.DataFrame() for c, se in sf_close_daily.iterrows(): if 'IC' in c: tmp_i = index_price_daily['ZZ500'] elif 'IF' in c: tmp_i = index_price_daily['HS300'] elif 'IH' in c: tmp_i = index_price_daily['SZ50'] else: print('Code Bug') raise ValueError # 去掉Nan tmp_c = se.dropna() tmp_i = tmp_i[tmp_c.index] if len(tmp_c) > 22: bet = rolling_regress_1(tmp_i, tmp_c, window=22) sf_beta = pd.concat([sf_beta, pd.DataFrame({c: bet}).T], axis=0) p = os.path.join(data_dair, 'index', 'stock_future') data.save(sf_beta, 'sf_beta', p)
def update_future_price(): data = Data() try: nh_future_price_daily = data.nh_future_price_daily.T st_date = nh_future_price_daily.index[-1] - timedelta(10) except Exception as e: nh_future_price_daily = pd.DataFrame() st_date = datetime(2010, 1, 1) ed_date = datetime.today().strftime("%Y%m%d") add_df = pd.DataFrame() for key in NH_index_dict.keys(): df_tmp = pro.index_daily(ts_code=key, start_date=st_date.strftime('%Y%m%d'), end_date=ed_date) df_tmp = df_tmp.set_index('trade_date') df_tmp.index = pd.to_datetime(df_tmp.index) df_tmp.sort_index(inplace=True) add_df = pd.concat( [add_df, pd.DataFrame({key: df_tmp['close']})], axis=1) # nh_future_price_daily to_del_index = [ i for i in add_df.index if i in nh_future_price_daily.index ] nh_future_price_daily.drop(to_del_index, axis=0, inplace=True) nh_future_price_daily = pd.concat([nh_future_price_daily, add_df], axis=0) save_p = r'D:\commodity_datebase\price_data' data.save(nh_future_price_daily, 'nh_future_price_daily.csv', save_path=save_p)
def update_f_data_from_wind(special_year=2015): path = os.path.join(data_dair, 'download_from_juyuan') w.start() data = Data() stock_basic_inform = data.stock_basic_inform mes = generate_months_ends() iterms = [ # StructWind('rd_exp', 'Q', 'unit=1;rptType=1;Days=Alldays'), StructWind('west_netprofit_YOY', 'M') ] codes_str = '' for i in stock_basic_inform.index: codes_str = codes_str + ',' + i codes_str = codes_str[1:] eds = datetime.today().strftime("%Y-%m-%d") for it in iterms: name = it.name period = it.period other = it.other try: tmp_df = eval('data.' + name.lower()) tds = tmp_df.columns[-1].strftime("%Y-%m-%d") except Exception as e: tmp_df = pd.DataFrame() tds = datetime(2009, 1, 1).strftime("%Y-%m-%d") # "unit=1;rptType=1;Period=Q;Days=Alldays" if other: oth = 'Period=' + period + ';' + other else: oth = 'Period=' + period if special_year: # = 2019 mes0 = [m for m in mes if m.year == special_year] tds = mes0[0].strftime("%Y-%m-%d") eds = mes0[-1].strftime("%Y-%m-%d") if not special_year and period == 'Q' and (datetime.today() - tds).days < 110: continue elif not special_year and period == 'M' and (datetime.today() - tds).days < 20: continue res_tmp = w.wsd(codes_str, name, tds, eds, oth, usedf=True) res_tmp1 = res_tmp[1] res_tmp1 = res_tmp1.T tmp_df = pd.concat([tmp_df, res_tmp1], axis=1) # 读取本地数据时和Wind提取数据时的时间格式可能不一样,统一一下才能排序 tmp_df.columns = pd.to_datetime(tmp_df.columns) # 把columns排序 tt = list(tmp_df.columns) tt.sort() tmp_df = tmp_df[tt] data.save(tmp_df, name.lower(), save_path=path)
def compute_changepect_open_daily(): data = Data() open_daily = data.openprice_daily adjfactor = data.adjfactor open_daily = open_daily * adjfactor open_daily.dropna(axis=1, how='all', inplace=True) open_daily_shift = open_daily.shift(1, axis=1) changepect_open_daily = open_daily / open_daily_shift changepect_open_daily = (changepect_open_daily - 1) * 100 changepect_open_daily.dropna(how='all', axis=1, inplace=True) data.save(changepect_open_daily, 'changepect_open_daily'.upper())
def update_futmap(): data = Data() try: fut_map = data.fut_map.T start_d = fut_map.index[-1].strftime("%Y%m%d") except Exception as e: fut_map = pd.DataFrame() start_d = datetime(2009, 1, 1).strftime("%Y%m%d") end_d = datetime.today().strftime("%Y%m%d") domain_if = pro.fut_mapping(ts_code='IF.CFX', start_date=start_d, end_date=end_d) domain_ih = pro.fut_mapping(ts_code='IH.CFX', start_date=start_d, end_date=end_d) domain_ic = pro.fut_mapping(ts_code='IC.CFX', start_date=start_d, end_date=end_d) domain_if.set_index('trade_date', inplace=True) domain_ih.set_index('trade_date', inplace=True) domain_ic.set_index('trade_date', inplace=True) domain_if.drop('ts_code', axis=1, inplace=True) domain_ih.drop('ts_code', axis=1, inplace=True) domain_ic.drop('ts_code', axis=1, inplace=True) domain_if.columns = ['IF'] domain_ih.columns = ['IH'] domain_ic.columns = ['IC'] domain_if.index = pd.to_datetime(domain_if.index) domain_if.sort_index(inplace=True) domain_ih.index = pd.to_datetime(domain_ih.index) domain_ih.sort_index(inplace=True) domain_ic.index = pd.to_datetime(domain_ic.index) domain_ic.sort_index(inplace=True) domain_fut = pd.concat([domain_if, domain_ih, domain_ic], axis=1) domain_fut = domain_fut.applymap(lambda x: x.split('.CFX')[0] + '.CFE' if isinstance(x, str) else x) dupl = [i for i in domain_fut.index if i in fut_map.index] domain_fut.drop(dupl, axis=0, inplace=True) fut_map = pd.concat([fut_map, domain_fut], axis=0) p = os.path.join(date_dair, 'index') data.save(fut_map, 'fut_map', save_path=p) print('期货主力合约映射表下载完毕')
def update_daily_basic(): data = Data() pb_df = data.pb_daily pe_df = data.pe_daily turnover_df = data.turnoverrate_daily negotiablemv_df = data.negotiablemv_daily # 流通市值(万元) totalmv_df = data.totalmv_daily # 总市值(万元) st = np.min([ pb_df.columns[-1], pe_df.columns[-1], turnover_df.columns[-1], negotiablemv_df.columns[-1], totalmv_df.columns[-1] ]) # turnover_rate_f float 换手率(自由流通股) # total_mv float 总市值 (万元) ed = datetime.today() - timedelta(1) tds = trade_days() days_to_update = [d for d in tds if st <= d <= ed] if len(days_to_update) == 0: print('日度指标数据:已经更新到最新数据,无需更新,自动退出') return None for d in days_to_update: # d = days_to_update[0] d_str = d.strftime("%Y%m%d") tmp_df = pro.daily_basic( ts_code='', trade_date=d_str, fields='ts_code,turnover_rate,pe,pb,total_mv,circ_mv') tmp_df = tmp_df.set_index('ts_code').sort_index() pb_df = insert_to_df(pb_df, pd.DataFrame({d: tmp_df['pb']})) pe_df = insert_to_df(pe_df, pd.DataFrame({d: tmp_df['pe']})) turnover_df = insert_to_df(turnover_df, pd.DataFrame({d: tmp_df['turnover_rate']})) totalmv_df = insert_to_df(totalmv_df, pd.DataFrame({d: tmp_df['total_mv']})) negotiablemv_df = insert_to_df(negotiablemv_df, pd.DataFrame({d: tmp_df['circ_mv']})) sleep(0.33) history_file = os.path.join(date_dair, 'download_from_juyuan') data.save(pb_df, 'pb_daily.csv', save_path=history_file) data.save(pe_df, 'pe_daily.csv', save_path=history_file) data.save(negotiablemv_df, 'NegotiableMV_daily.csv', save_path=history_file) data.save(totalmv_df, 'TotalMV_daily.csv', save_path=history_file) data.save(turnover_df, 'TurnoverRate_daily.csv', save_path=history_file)
def update_index_wei(): w.start() data = Data() zz500_wt = data.zz500_wt hs300_wt = data.hs300_wt mes = generate_months_ends() # 先删除一些不是月末的数据 to_del = [c for c in zz500_wt.columns if c not in mes] if len(to_del) > 0: zz500_wt = zz500_wt.drop(to_del, axis=1) to_del = [c for c in hs300_wt.columns if c not in mes] if len(to_del) > 0: hs300_wt = hs300_wt.drop(to_del, axis=1) new_mes = [m for m in mes if m > zz500_wt.columns[-1]] for m in new_mes: m_str = m.strftime("%Y-%m-%d") # 沪深300 res = w.wset("indexconstituent", "date=" + m_str + ";windcode=000300.SH", usedf=True) res = res[1] res.set_index('wind_code', inplace=True) to_add = pd.DataFrame({m: res['i_weight']}) hs300_wt = pd.concat([hs300_wt, to_add], axis=1) # 中证500 res = w.wset("indexconstituent", "date=" + m_str + ";windcode=000905.SH", usedf=True) res = res[1] res.set_index('wind_code', inplace=True) to_add = pd.DataFrame({m: res['i_weight']}) zz500_wt = pd.concat([zz500_wt, to_add], axis=1) data.save(hs300_wt, 'hs300_wt', save_path=os.path.join(data_dair, 'index')) data.save(zz500_wt, 'zz500_wt', save_path=os.path.join(data_dair, 'index'))
def update_each_future(): # 1, 下载所有的日度价量信息 data = Data() # 得到所有股指期货历史合约的code和上市日期、退市日期 his_df = pro.fut_basic( exchange='CFFEX', fields='ts_code,symbol,fut_code,name,list_date,delist_date') # 删除一些连续啊之类名字的合约 his_df.drop(his_df.index[pd.isna(his_df['list_date'])], axis=0, inplace=True) # 保留股指的,删除国债期货的 save_l = [] for i, v in his_df['symbol'].items(): if 'IF' in v or 'IH' in v or 'IC' in v: save_l.append(i) his_df = his_df.loc[save_l, :] his_df = his_df.drop(['symbol'], axis=1) his_df = his_df.set_index('ts_code') his_df = his_df.sort_values("list_date") try: all_his_open_df = data.sf_open_daily all_his_close_df = data.sf_close_daily all_his_vol_df = data.sf_vol_daily all_his_amount_df = data.sf_amount_daily all_his_oi_df = data.sf_oi_daily start_d = all_his_open_df.columns[-1] except Exception as e: all_his_open_df = pd.DataFrame() all_his_close_df = pd.DataFrame() all_his_vol_df = pd.DataFrame() all_his_amount_df = pd.DataFrame() all_his_oi_df = pd.DataFrame() start_d = datetime(2009, 1, 1).strftime("%Y%m%d") # 新的合约 new_contracts = [c for c in his_df.index if c not in all_his_open_df.index] # 已经有的合约 his_contracts = [c for c in his_df.index if c in all_his_open_df.index] # todo 明天验证一下 for i, se in his_df.loc[his_contracts, :].iterrows(): # 确定截至日期 if datetime.strptime(se['delist_date'], "%Y%m%d") >= datetime.today(): ed = (datetime.today() - timedelta(1)).strftime("%Y%m%d") else: # 该合约已经退市,做下一个循环 continue res_tmp = pro.fut_daily(ts_code=i, start_date=start_d, end_date=ed) res_tmp.set_index('trade_date', inplace=True) res_tmp.index = pd.to_datetime(res_tmp.index) res_tmp.sort_index(inplace=True) all_his_open_df.loc[i, res_tmp.index] = res_tmp['open'] all_his_close_df.loc[i, res_tmp.index] = res_tmp['close'] all_his_vol_df.loc[i, res_tmp.index] = res_tmp['vol'] all_his_amount_df.loc[i, res_tmp.index] = res_tmp['amount'] all_his_oi_df.loc[i, res_tmp.index] = res_tmp['oi'] # 您每分钟最多访问该接口20次 sleep(3) if len(new_contracts) > 0: # 有新的合约上市交易 for i, se in his_df.loc[new_contracts, :].iterrows(): ed = (datetime.today() - timedelta(1)).strftime("%Y%m%d") res_tmp = pro.fut_daily(ts_code=i, start_date=se['list_date'], end_date=ed) res_tmp.set_index('trade_date', inplace=True) res_tmp.index = pd.to_datetime(res_tmp.index) res_tmp.sort_index(inplace=True) all_his_open_df = pd.concat( [all_his_open_df, pd.DataFrame({ i: res_tmp['open'] }).T], axis=0) all_his_close_df = pd.concat( [all_his_close_df, pd.DataFrame({ i: res_tmp['close'] }).T], axis=0) all_his_vol_df = pd.concat( [all_his_vol_df, pd.DataFrame({ i: res_tmp['vol'] }).T], axis=0) all_his_amount_df = pd.concat( [all_his_amount_df, pd.DataFrame({ i: res_tmp['amount'] }).T], axis=0) all_his_oi_df = pd.concat( [all_his_oi_df, pd.DataFrame({ i: res_tmp['oi'] }).T], axis=0) # 您每分钟最多访问该接口20次 sleep(3) p = os.path.join(date_dair, 'index', 'stock_future') data.save(all_his_open_df, 'sf_open_daily', save_path=p) data.save(all_his_close_df, 'sf_close_daily', save_path=p) data.save(all_his_vol_df, 'sf_vol_daily', save_path=p) data.save(all_his_amount_df, 'sf_amount_daily', save_path=p) data.save(all_his_oi_df, 'sf_oi_daily', save_path=p) print('期货合约价量数据下载完毕')
def update_stock_daily_price(): data = Data() openprice_pd = data.openprice_daily highprice_pd = data.highprice_daily closeprice_pd = data.closeprice_daily lowprice_pd = data.lowprice_daily changePCT_pd = data.changepct_daily t_volume = data.turnovervolume_daily # 成交量 t_value = data.turnovervalue_daily # 成交额(万元) st = np.min([ openprice_pd.columns[-1], highprice_pd.columns[-1], closeprice_pd.columns[-1], lowprice_pd.columns[-1], changePCT_pd.columns[-1], t_volume.columns[-1], t_value.columns[-1] ]) if datetime.today().hour < 16: ed = datetime.today() - timedelta(1) else: ed = datetime.today() tds = trade_days() days_to_update = [d for d in tds if st <= d <= ed] if len(days_to_update) == 0: print('日度行情数据:已经更新到最新数据,无需更新,自动退出') return None # st = st.strftime("%Y%m%d") # ed = ed.strftime("%Y%m%d") for d in days_to_update: # d = days_to_update[0] d_str = d.strftime("%Y%m%d") df_tmp = pro.daily(trade_date=d_str) df_tmp = df_tmp.set_index('ts_code') openprice_pd = insert_to_df(openprice_pd, pd.DataFrame({d: df_tmp['open']})) highprice_pd = insert_to_df(highprice_pd, pd.DataFrame({d: df_tmp['high']})) lowprice_pd = insert_to_df(lowprice_pd, pd.DataFrame({d: df_tmp['low']})) closeprice_pd = insert_to_df(closeprice_pd, pd.DataFrame({d: df_tmp['close']})) changePCT_pd = insert_to_df(changePCT_pd, pd.DataFrame({d: df_tmp['pct_chg']})) t_volume = insert_to_df(t_volume, pd.DataFrame({d: df_tmp['vol']}) / 100) # ts里面单位是手,聚源里面是万股 t_value = insert_to_df(t_value, pd.DataFrame({d: df_tmp['amount']}) / 10) # ts里面单位是千,聚源里面是万 openprice_pd.index.name = 'Code' highprice_pd.index.name = 'Code' lowprice_pd.index.name = 'Code' closeprice_pd.index.name = 'Code' changePCT_pd.index.name = 'Code' t_volume.index.name = 'Code' t_value.index.name = 'Code' history_file = os.path.join(date_dair, 'download_from_juyuan') data.save(openprice_pd, 'OpenPrice_daily.csv', save_path=history_file) data.save(highprice_pd, 'HighPrice_daily.csv', save_path=history_file) data.save(lowprice_pd, 'ClosePrice_daily.csv', save_path=history_file) data.save(closeprice_pd, 'LowPrice_daily.csv', save_path=history_file) data.save(changePCT_pd, 'ChangePCT_daily.csv', save_path=history_file) data.save(t_volume, 'TurnoverVolume_daily.csv', save_path=history_file) data.save(t_value, 'TurnoverValue_daily.csv', save_path=history_file)
def update_stock_future_dat(): # 1, 下载所有的日度价量信息 data = Data() # 得到所有股指期货历史合约的code和上市日期、退市日期 his_df = pro.fut_basic( exchange='CFFEX', fields='ts_code,symbol,fut_code,name,list_date,delist_date') # 删除一些连续啊之类名字的合约 his_df.drop(his_df.index[pd.isna(his_df['list_date'])], axis=0, inplace=True) # 保留股指的,删除国债期货的 save_l = [] for i, v in his_df['symbol'].items(): if 'IF' in v or 'IH' in v or 'IC' in v: save_l.append(i) his_df = his_df.loc[save_l, :] his_df = his_df.drop(['symbol'], axis=1) his_df = his_df.sort_values("list_date") his_df.index = range(0, len(his_df)) all_his_open_df = pd.DataFrame() all_his_close_df = pd.DataFrame() all_his_vol_df = pd.DataFrame() all_his_amount_df = pd.DataFrame() all_his_oi_df = pd.DataFrame() for i, se in his_df.iterrows(): # 确定截至日期 if datetime.strptime(se['delist_date'], "%Y%m%d") > datetime.today(): ed = (datetime.today() - timedelta(1)).strftime("%Y%m%d") else: ed = se['delist_date'] res_tmp = pro.fut_daily(ts_code=se['ts_code'], start_date=se['list_date'], end_date=ed) res_tmp.set_index('trade_date', inplace=True) res_tmp.index = pd.to_datetime(res_tmp.index) res_tmp.sort_index(inplace=True) all_his_open_df = pd.concat([ all_his_open_df, pd.DataFrame({ se['ts_code']: res_tmp['open'] }).T ], axis=0) all_his_close_df = pd.concat([ all_his_close_df, pd.DataFrame({ se['ts_code']: res_tmp['close'] }).T ], axis=0) all_his_vol_df = pd.concat( [all_his_vol_df, pd.DataFrame({ se['ts_code']: res_tmp['vol'] }).T], axis=0) all_his_amount_df = pd.concat([ all_his_amount_df, pd.DataFrame({ se['ts_code']: res_tmp['amount'] }).T ], axis=0) all_his_oi_df = pd.concat( [all_his_oi_df, pd.DataFrame({ se['ts_code']: res_tmp['oi'] }).T], axis=0) # 您每分钟最多访问该接口20次 sleep(3) p = r'D:\pythoncode\IndexEnhancement\指数相关\history_data' data.save(all_his_open_df, 'sf_open_daily', save_path=p) data.save(all_his_close_df, 'sf_close_daily', save_path=p) data.save(all_his_vol_df, 'sf_vol_daily', save_path=p) data.save(all_his_amount_df, 'sf_amount_daily', save_path=p) data.save(all_his_oi_df, 'sf_oi_daily', save_path=p) # try: # fut_map = data.fut_map # start_d = fut_map.index[-1] # except Exception as e: # fut_map = pd.DataFrame() # start_d = datetime(2009, 1, 1).strftime("%Y%m%d") # # end_d = datetime.today().strftime("%Y%m%d") # # domain_if = pro.fut_mapping(ts_code='IF.CFX', start_date=start_d, end_date=end_d) # domain_ih = pro.fut_mapping(ts_code='IH.CFX', start_date=start_d, end_date=end_d) # domain_ic = pro.fut_mapping(ts_code='IC.CFX', start_date=start_d, end_date=end_d) # # domain_if.set_index('trade_date', inplace=True) # domain_ih.set_index('trade_date', inplace=True) # domain_ic.set_index('trade_date', inplace=True) # # domain_if.drop('ts_code', axis=1, inplace=True) # domain_ih.drop('ts_code', axis=1, inplace=True) # domain_ic.drop('ts_code', axis=1, inplace=True) # # domain_if.columns = ['IF'] # domain_ih.columns = ['IH'] # domain_ic.columns = ['IC'] # # domain_if.index = pd.to_datetime(domain_if.index) # domain_if.sort_index(inplace=True) # domain_ih.index = pd.to_datetime(domain_ih.index) # domain_ih.sort_index(inplace=True) # domain_ic.index = pd.to_datetime(domain_ic.index) # domain_ic.sort_index(inplace=True) # # domain_fut = pd.concat([domain_if, domain_ih, domain_ic], axis=1) # domain_fut = domain_fut.applymap(lambda x: x.split('.CFX')[0] + '.CFE' if isinstance(x, str) else x) # # fut_map = pd.concat([fut_map, domain_fut], axis=0) # p = r'D:\pythoncode\IndexEnhancement\指数相关' # data.save(fut_map, 'fut_map', save_path=p) # # print('下载完毕')