def update_strategy_index(date_from_str, date_to_str): """ strategy_index_info 中所有 strategy_name 更新指数净值到数据库 strategy_index_val 中 :param date_from_str: 起始日期 %Y-%m-%d :param date_to_str: 截止日期 %Y-%m-%d :return: """ engine = get_db_engine() with get_db_session(engine) as session: stg_table = session.execute( 'select strategy_name from strategy_index_info group by strategy_name' ) strategy_name_list = [stg_info[0] for stg_info in stg_table.fetchall()] strategy_name_count = len(strategy_name_list) if strategy_name_count == 0: logger.info('strategy_index_info table is empty') return # strategy_name_list = ['long_only', 'cta', 'arbitrage', 'alpha', 'macro'] date_from = datetime.strptime(date_from_str, '%Y-%m-%d').date() date_to = datetime.strptime(date_to_str, '%Y-%m-%d').date() index_df_list = [] for strategy_name in strategy_name_list: # strategy_name = 'long_only' # index_df = calc_strategy_index(strategy_name, date_from, date_to, calc_sample_name='main') stg_index_s = get_strategy_index_by_name(strategy_name, date_from, date_to, statistic=False) if stg_index_s is not None: logger.info('生成%s策略指数【%s ~ %s】', strategy_name, stg_index_s.index[0], stg_index_s.index[-1]) # index_df.to_csv('%s_sample_%s_%s.csv' % (strategy_name, date_from, date_to)) index_df = pd.DataFrame({'value': stg_index_s}) index_df.index.rename('nav_date', inplace=True) index_df.reset_index(inplace=True) # index_df.rename(columns={'nav_date_week': 'nav_date', strategy_name: 'value'}, inplace=True) index_df['index_name'] = strategy_name index_df_list.append(index_df) else: logger.info('No Data for shown on %s', strategy_name) index_df_all = pd.concat(index_df_list) index_df_all.set_index(['index_name', 'nav_date'], inplace=True) # 重置内容 table_name = 'strategy_index_val' with get_db_session(engine) as session: # session.execute("delete from %s where nav_date between '%s' and '%s'" % (table_name, date_from_str, date_to_str)) session.execute("truncate table %s" % table_name) index_df_all.to_sql(table_name, engine, if_exists='append', dtype={ 'index_name': String(20), 'nav_date': Date, 'value': FLOAT, })
def save_fund_info(fund_info_df, get_df=False, mode='replace_insert'): table_name = 'wind_fund_info' engine = get_db_engine() fund_info_df.rename(columns={ 'SEC_NAME': 'sec_name', 'FUND_SETUPDATE': 'fund_setupdate', 'FUND_MATURITYDATE': 'fund_maturitydate', 'FUND_MGRCOMP': 'fund_mgrcomp', 'FUND_EXISTINGYEAR': 'fund_existingyear', 'FUND_PTMYEAR': 'fund_ptmyear', 'FUND_TYPE': 'fund_type', 'FUND_FUNDMANAGER': 'fund_fundmanager' }, inplace=True) fund_info_df.index.names = ['wind_code'] fund_info_df.drop_duplicates(inplace=True) if mode == 'append': fund_info_df.to_sql(table_name, engine, if_exists='append', dtype={ 'wind_code': String(200), 'sec_name': String(200), 'strategy_type': String(200), 'fund_setupdate': Date, 'fund_maturitydate': Date }) elif mode == 'replace_insert': fund_info_df.reset_index(inplace=True) data_list = list(fund_info_df.T.to_dict().values()) sql_str = """REPLACE INTO wind_fund_info (wind_code,sec_name,strategy_type,fund_setupdate,fund_maturitydate,fund_mgrcomp,fund_existingyear,fund_ptmyear,fund_type,fund_fundmanager) VALUES (:wind_code,:sec_name,:strategy_type,:fund_setupdate,:fund_maturitydate,:fund_mgrcomp,:fund_existingyear,:fund_ptmyear,:fund_type,:fund_fundmanager); """ with get_db_session() as session: session.execute(sql_str, data_list) else: raise ValueError('mode="%s" is not available' % mode) logging.info('%d funds inserted' % fund_info_df.shape[0]) # wind_fund_info 表中增量数据插入到 fund_info sql_str = """insert into fund_info(wind_code, sec_name, strategy_type, fund_setupdate, fund_maturitydate, fund_mgrcomp, fund_existingyear, fund_ptmyear, fund_type, fund_fundmanager) select wfi.wind_code, wfi.sec_name, wfi.strategy_type, wfi.fund_setupdate, wfi.fund_maturitydate, wfi.fund_mgrcomp, wfi.fund_existingyear, wfi.fund_ptmyear, wfi.fund_type, wfi.fund_fundmanager from wind_fund_info wfi left outer join fund_info fi on wfi.wind_code=fi.wind_code where fi.wind_code is null""" with get_db_session(engine) as session: table = session.execute(sql_str) logging.info( 'new data was inserted into fund_info from wind_fund_info table') return fund_info_df if get_df else None
def import_trade_date(): """ 增量导入交易日数据导数据库表 wind_trade_date,默认导入未来300天的交易日数据 :return: """ w = WindRest(WIND_REST_URL) engine = get_db_engine() trade_date_start = None with get_db_session(engine) as session: try: trade_date_max = session.execute( 'select max(trade_date) from wind_trade_date').first()[0] if trade_date_max is not None: trade_date_start = ( trade_date_max + timedelta(days=1)).strftime(STR_FORMAT_DATE) except Exception as exp: logging.exception("交易日获取异常") if trade_date_start is None: trade_date_start = '1990-01-01' end_date_str = (date.today() + timedelta(days=310)).strftime(STR_FORMAT_DATE) date_df = w.tdays(trade_date_start, end_date_str) if date_df is not None: print(date_df.shape) date_df = date_df.set_index('date').rename( columns={'date': 'trade_date'}) date_df.to_sql('wind_trade_date', engine, if_exists='append') logging.info('%d trade date has been imported', date_df.shape[0])
def get_fund_exposure_his(wind_code, period_count=12): # 链接数据库,并获取fundnav旧表 with get_db_session() as session: # 获取 nav_date 列表 nav_date_table = session.execute( 'select nav_date from fund_nav, (select trade_date from stock_facexposure group by trade_date) as td where wind_code=:code and td.trade_date=fund_nav.nav_date group by nav_date order by nav_date', {'code': wind_code}) nav_date_list = [nav_date[0] for nav_date in nav_date_table.fetchall()] nav_date_count = len(nav_date_list) logger.info('nav_date_count:{}'.format(nav_date_count)) if nav_date_count > 0: fund_exposure_df_list = [] for n in range(max([0, nav_date_count - period_count]), nav_date_count - 6): nav_date_str1 = nav_date_list[n - 1].strftime(STR_FORMAT_DATE) nav_date_str = nav_date_list[n].strftime(STR_FORMAT_DATE) logger.info('calc cal_perform_attrib(%s, %s, %s)', wind_code, nav_date_str1, nav_date_str) fund_exposure_df = cal_perform_attrib(wind_code, nav_date_str1, nav_date_str) # '2016-01-04', '2016-01-29') if fund_exposure_df is None: continue fund_exposure_df_list.append( fund_exposure_df.rename(columns={'因子暴露': nav_date_str})) if len(fund_exposure_df_list) != 0: fund_exposure_his_df = pd.concat(fund_exposure_df_list, axis=1) else: fund_exposure_his_df = None return fund_exposure_his_df else: return None
def fill_wind_index_daily_col(): with get_db_session() as session: sql_str = "select wind_code, min(trade_date), max(trade_date) from wind_index_daily group by wind_code" table = session.execute(sql_str) wind_date_dic = { content[0]: (content[1], content[2]) for content in table.fetchall() } for wind_code, date_pair in wind_date_dic.items(): logger.debug('invoke wsd for %s between %s and %s', wind_code, date_pair[0], date_pair[1]) data_df = rest.wsd(wind_code, "turn,free_turn", date_pair[0], date_pair[1], "") data_df.dropna(inplace=True) if data_df.shape[0] == 0: continue logger.debug('%d data importing for %s', data_df.shape[0], wind_code) data_df['WIND_CODE'] = wind_code data_df.index.rename('TRADE_DATE', inplace=True) data_df.reset_index(inplace=True) data_list = list(data_df.T.to_dict().values()) sql_str = """update wind_index_daily set turn=:TURN, free_turn=:FREE_TURN where wind_code= :WIND_CODE and trade_date = :TRADE_DATE""" session.execute(sql_str, params=data_list)
def get_trade_code_wind_code_dic(): global TRADE_CODE_WIND_CODE_DIC if TRADE_CODE_WIND_CODE_DIC is None: with get_db_session() as session: table = session.execute( 'select trade_code, wind_code from wind_stock_info') TRADE_CODE_WIND_CODE_DIC = dict(table.fetchall()) # print('name_code_dic:\n', name_code_dic) return TRADE_CODE_WIND_CODE_DIC
def get_stock_name_code_dic(): global NAME_CODE_DIC if NAME_CODE_DIC is None: with get_db_session() as session: table = session.execute( 'select sec_name, wind_code from wind_stock_info') NAME_CODE_DIC = dict(table.fetchall()) # print('name_code_dic:\n', name_code_dic) return NAME_CODE_DIC
def update_trade_date_latest(wind_code_trade_date_latest): params = [{ 'wind_code': wind_code, 'trade_date_latest': trade_date_latest } for wind_code, trade_date_latest in wind_code_trade_date_latest.items()] with get_db_session() as session: session.execute( 'update fund_info set trade_date_latest = :trade_date_latest where wind_code = :wind_code', params) logger.info('%d funds update latest trade date', len(wind_code_trade_date_latest))
def update_factors(): rest = WindRest(WIND_REST_URL) sql_str = 'SELECT max(Trade_Date) as trade_date_latest FROM stock_facexposure' with get_db_session() as session: trade_date_latest = session.execute(sql_str).fetchone()[0] date_start_str = rest.tdaysoffset(1, trade_date_latest) date_today_str = date.today().strftime('%Y-%m-%d') date_end_str = rest.tdaysoffset(-1, date_today_str) if datetime.strptime(date_start_str, STR_FORMAT_DATE) <= datetime.strptime(date_end_str, STR_FORMAT_DATE): save_factor_explore(date_start_str, date_end_str)
def get_fund_nav_weekly_by_strategy(strategy_type_en, date_from, date_to, show_fund_name=False, do_filter_wind_code=False): """ 输入策略代码,起止日期,返回该策略所有基金周净值 :param strategy_type_en: :param date_from: :param date_to: :param show_fund_name: :return: """ global STRATEGY_TYPE_EN_CN_DIC sql_str = """select fv.wind_code, nav_date_week, fv.nav_acc from ( select wind_code, adddate(nav_date, 4 - weekday(nav_date)) as nav_date_week, max(nav_date) as nav_date_max from fund_nav where wind_code in (select wind_code from fund_info where strategy_type = '%s') group by wind_code, nav_date_week having nav_date_week between '%s' and '%s' ) as ffv, fund_nav fv where ffv.nav_date_week between '%s' and '%s' and fv.wind_code = ffv.wind_code and fv.nav_date = ffv.nav_date_max group by wind_code, nav_date_week order by nav_date_week desc""" strategy_name_cn = STRATEGY_TYPE_EN_CN_DIC[strategy_type_en] query_str = sql_str % (strategy_name_cn, date_from, date_to, date_from, date_to) # logger.debug('策略子基金净值查询sql:\n%s', query_str) engine = get_db_engine() data_df = pd.read_sql_query(query_str, engine) fund_nav_df = data_df.pivot(index='nav_date_week', columns='wind_code', values='nav_acc') # 筛选子基金列表 if do_filter_wind_code: fund_nav_df = filter_wind_code(fund_nav_df, strategy_type_en) if show_fund_name: # 获取样本子基金名称 sql_str = "select wind_code, sec_name from fund_info where wind_code in (%s)" wind_code_list_str = ', '.join( ["'" + wind_code + "'" for wind_code in list(fund_nav_df.columns)]) query_str = sql_str % wind_code_list_str with get_db_session(engine) as session: table = session.execute(query_str) fund_code_name_dic = dict(table.fetchall()) # logger.info(df_fund_nav) fund_nav_df.rename(columns=fund_code_name_dic, inplace=True) return fund_nav_df
def update_fundnav_by_csv2(file_path, mode='delete_insert'): df = pd.read_csv(file_path) df.set_index('nav_date_week', inplace=True) df_fund = df.unstack().reset_index().dropna() col_name_list = list(df_fund.columns) df_fund.rename(columns={ col_name_list[0]: 'wind_code', col_name_list[1]: 'nav_date', col_name_list[2]: 'nav' }, inplace=True) df_fund[['trade_date', 'nav_acc']] = df_fund[['nav_date', 'nav']] if mode == 'delete_insert': df_fund.set_index(['wind_code', 'trade_date'], inplace=True) # df_fund['nav_acc'] = df_fund['nav'] table_name = 'fund_nav_tmp' sql_str = 'delete from fund_nav_tmp where wind_code in (%s)' % ( "'" + "', '".join(df.columns) + "'") engine = get_db_engine() with get_db_session(engine) as session: session.execute(sql_str) df_fund.to_sql(table_name, engine, if_exists='append', dtype={ 'wind_code': String(20), 'trade_date': Date, 'nav_date': Date, 'nav': FLOAT, 'nav_acc': FLOAT, }) elif mode == 'replace_insert': data_list = list(df_fund.T.to_dict().values()) sql_str = "REPLACE INTO fund_nav_tmp (wind_code, trade_date, nav, nav_acc, nav_date) values (:wind_code, :trade_date, :nav, :nav_acc, :nav_date)" with get_db_session() as session: session.execute(sql_str, data_list) pass else: raise ValueError('mode="%s" is not available' % mode)
def update_fund_mgrcomp_info(): """ fund_info 表 及 fund_nav表更新完成后,更新及插入 fund_mgrcomp_info 表相关统计信息 :return: """ with get_db_session() as session: # 对已存在数据更新 基金统计数据 sql_str = """update fund_mgrcomp_info fmi, ( select fi.fund_mgrcomp, fi.fund_maturitydate, count(*) fund_count_tot, sum(if(fi.fund_maturitydate is null or fi.fund_maturitydate <= '1900-01-01' or fi.fund_maturitydate > curdate(),1,0)) fund_count_existing, sum(if(datediff(curdate(), ifnull(fn.nav_date_max,'1900-01-01'))<30, 1, 0)) fund_count_active from fund_info fi left outer join (select wind_code, max(nav_date) nav_date_max from fund_nav group by wind_code) fn on fi.wind_code = fn.wind_code where fi.fund_mgrcomp is not null and fi.fund_mgrcomp not like '%%*%%' group by fi.fund_mgrcomp ) fmi_new set fmi.fund_count_tot = fmi_new.fund_count_tot, fmi.fund_count_existing = fmi_new.fund_count_existing, fmi.fund_count_active = fmi_new.fund_count_active where fmi.name = fmi_new.fund_mgrcomp""" session.execute(sql_str) logger.info('update fund_mgrcomp_info finished') # 添加新基金管理人信息及统计数据 sql_str = """insert into fund_mgrcomp_info(name, review_status, fund_count_tot,fund_count_existing,fund_count_active) select fi.fund_mgrcomp, 0, count(*) fund_count_tot, sum(if(fi.fund_maturitydate is null or fi.fund_maturitydate <= '1900-01-01' or fi.fund_maturitydate > curdate(),1,0)) fund_count_existing, sum(if(datediff(curdate(), ifnull(fn.nav_date_max,'1900-01-01'))<30, 1, 0)) fund_count_active from fund_info fi left outer join (select wind_code, max(nav_date) nav_date_max from fund_nav group by wind_code) fn on fi.wind_code = fn.wind_code where fi.fund_mgrcomp is not null and fi.fund_mgrcomp not like '%*%' group by fi.fund_mgrcomp having fi.fund_mgrcomp not in (select name from fund_mgrcomp_info)""" session.execute(sql_str) logger.info('insert new fund_mgrcomp into fund_mgrcomp_info finished') sql_str = """update fund_info fi, fund_mgrcomp_info fmi set fi.mgrcomp_id = fmi.mgrcomp_id where fi.fund_mgrcomp = fmi.name and fi.mgrcomp_id is null""" session.execute(sql_str) logger.info('update mgrcomp_id on fund_mgrcomp_info finished')
def get_fund_calc_info(wind_code_s): """获取 fund_essential_info 相关基金要素信息,费率、费率计算模式、初始规模、初始金额等数据""" sql_str = """SELECT wind_code_s,wind_code,sec_name_s,date_start,date_end,warning_line,winding_line,closed_period, investment_scope,investment_restrictions,open_frequency,bonus_mode,subscribe_threshold,redemption_threshold, day_count_4_calc_fee,manage_fee_rate,manage_fee_calc_mode,custodian_fee_rate,custodian_fee_calc_mode, admin_fee_rate,admin_fee_calc_mode,storage_fee_rate,storage_fee_calc_mode,subscribe_fee_rate,redemption_fee_rate, subscribe_fee_mode,incentive_raito,incentive_mode,incentive_period,other_contract,remark, ifnull(invest_amount,0),ifnull(share_confirmed,0) FROM fund_essential_info where wind_code_s = :wind_code_s""" with get_db_session() as session: table = session.execute(sql_str, {"wind_code_s": wind_code_s}) content = table.first() if content is None: fund_calc_info = None else: fund_calc_info = { 'wind_code_s': content[0], 'wind_code': content[1], 'sec_name_s': content[2], 'date_start': str_2_date(content[3]), 'date_end': str_2_date(content[4]), 'warning_line': content[5], 'winding_line': content[6], 'closed_period': content[7], 'investment_scope': content[8], 'investment_restrictions': content[9], 'open_frequency': content[10], 'bonus_mode': content[11], 'subscribe_threshold': content[12], 'redemption_threshold': content[13], 'day_count_4_calc_fee': content[14], 'manage_fee_rate': content[15], 'manage_fee_calc_mode': content[16], 'custodian_fee_rate': content[17], 'custodian_fee_calc_mode': content[18], 'admin_fee_rate': content[19], 'admin_fee_calc_mode': content[20], 'storage_fee_rate': content[21], 'storage_fee_calc_mode': content[22], 'subscribe_fee_rate': content[23], 'redemption_fee_rate': content[24], 'subscribe_fee_mode': content[25], 'incentive_raito': content[26], 'incentive_mode': content[27], 'incentive_period': content[28], 'other_contract': content[29], 'remark': content[30], 'invest_amount': content[31], 'share_confirmed': content[32], } return fund_calc_info
def get_fund_nav_acc(wind_code): """ 获取制定基金净值数据,日期转换到每周周五,每周之多一条数据 :param wind_code: :return: """ with get_db_session() as session: table = session.execute(SQL_FUND_NAV_FRIDAY, { 'wind_code2': wind_code, 'wind_code1': wind_code }) date_nav_dic = dict(table.fetchall()) nav_series = pd.Series(date_nav_dic, name=wind_code) return nav_series
def filter_wind_code(fund_nav_df, strategy_type_en): """债券策略指数中存在很多不符合标准的基金,因此需要以 strategy_index_info 中保持的列表为基准""" query_str = "select wind_code_str from strategy_index_info where strategy_name = :strategy_type" with get_db_session() as session: row_data = session.execute(query_str, { 'strategy_type': strategy_type_en }).fetchone() if row_data is not None and len(row_data) > 0: wind_code_str = row_data[0] if wind_code_str is not None and len(wind_code_str) > 0: wind_code_list = wind_code_str.split(',') wind_code_list = list( set(list(fund_nav_df.columns)) & set(wind_code_list)) fund_nav_df = fund_nav_df[wind_code_list] return fund_nav_df
def do_fund_multi_factor(): sql_str = """select distinct wind_code from fof_fund_pct ffp, ( select wind_code_p, max(date_adj) date_latest from fof_fund_pct group by wind_code_p ) ff_date_latest, fund_essential_info ffm where ffp.wind_code_p = ff_date_latest.wind_code_p and ffp.wind_code_s = ffm.wind_code_s and ffp.date_adj = ff_date_latest.date_latest""" with get_db_session() as session: table = session.execute(sql_str) wind_code_list = [wind_code[0] for wind_code in table.fetchall()] do_fund_multi_factor_by_wind_code_list(wind_code_list)
def fill_fund_info_strategy_data(wind_code_list, strategy_type, rank=4): update_wind_fund_info_by_code_list(wind_code_list, strategy_type) sql_str_wind_code_in = "'" + "', '".join(wind_code_list) + "'" with get_db_session() as session: sql_str = "update fund_info set rank=:rank where wind_code in (%s)" % sql_str_wind_code_in session.execute(sql_str, params={'rank': rank}) sql_str = "select wind_code, fund_setupdate from fund_info where wind_code in (%s)" % sql_str_wind_code_in param_list = [{ 'wind_code': content[0], 'stg_code': strategy_type, 'trade_date': content[1], 'stg_pct': 100 } for content in session.execute(sql_str).fetchall()] sql_str = 'insert into fund_stg_pct(wind_code, stg_code, trade_date, stg_pct) values (:wind_code, :stg_code, :trade_date, :stg_pct)' session.execute(sql_str, params=param_list)
def import_wind_stock_info(refresh=False): # 获取全市场股票代码及名称 if refresh: date_fetch = datetime.strptime('2005-1-1', STR_FORMAT_DATE).date() else: date_fetch = date.today() date_end = date.today() stock_code_set = set() while date_fetch < date_end: stock_code_set_sub = get_stock_code_set(date_fetch) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub date_fetch += timedelta(days=365) stock_code_set_sub = get_stock_code_set(date_fetch) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub # 获取股票对应上市日期,及摘牌日期 # w.wss("300005.SZ,300372.SZ,000003.SZ", "ipo_date,trade_code,mkt,exch_city,exch_eng") stock_code_list = list(stock_code_set) stock_code_count = len(stock_code_list) seg_count = 1000 loop_count = math.ceil(float(stock_code_count) / seg_count) stock_info_df_list = [] for n in range(loop_count): num_start = n * seg_count num_end = (n + 1) * seg_count num_end = num_end if num_end <= stock_code_count else stock_code_count stock_code_list_sub = stock_code_list[num_start:num_end] # 尝试将 stock_code_list_sub 直接传递给wss,是否可行 stock_info_df = w.wss(stock_code_list_sub, "sec_name,trade_code,ipo_date,delist_date,mkt,exch_city,exch_eng,prename") stock_info_df_list.append(stock_info_df) stock_info_all_df = pd.concat(stock_info_df_list) stock_info_all_df.index.rename('WIND_CODE', inplace=True) logging.info('%s stock data will be import', stock_info_all_df.shape[0]) engine = get_db_engine() stock_info_all_df.reset_index(inplace=True) data_list = list(stock_info_all_df.T.to_dict().values()) sql_str = "REPLACE INTO wind_stock_info (wind_code, trade_code, sec_name, ipo_date, delist_date, mkt, exch_city, exch_eng, prename) values (:WIND_CODE, :TRADE_CODE, :SEC_NAME, :IPO_DATE, :DELIST_DATE, :MKT, :EXCH_CITY, :EXCH_ENG, :PRENAME)" # sql_str = "insert INTO wind_stock_info (wind_code, trade_code, sec_name, ipo_date, delist_date, mkt, exch_city, exch_eng, prename) values (:WIND_CODE, :TRADE_CODE, :SEC_NAME, :IPO_DATE, :DELIST_DATE, :MKT, :EXCH_CITY, :EXCH_ENG, :PRENAME)" with get_db_session(engine) as session: session.execute(sql_str, data_list) stock_count = session.execute('select count(*) from wind_stock_info').first()[0] logging.info("%d stocks have been in wind_stock_info", stock_count)
def import_wind_fund_nav_to_fund_nav(): sql_str = """insert fund_nav(wind_code, nav_date, nav, nav_acc, source_mark) select wfn.wind_code, wfn.nav_date, wfn.nav, wfn.nav_acc, 0 source_mark from ( select wind_code, nav_date, nav, nav_acc, 0 from wind_fund_nav group by wind_code, nav_date ) as wfn left outer join fund_nav fn on wfn.wind_code = fn.wind_code and wfn.nav_date = fn.nav_date where fn.nav is null""" with get_db_session() as session: session.execute(sql_str) logging.info('wind_fund_nav has been imported to fund_nav table')
def clean_fund_nav(date_str): """ wind数据库中存在部分数据净值记录前后不一致的问题 比如:某日记录净值 104,次一后期净值变为 1.04 导致净值收益率走势出现偏差 此脚本主要目的在于对这种偏差进行修正 :param date_str: :return: """ sql_str = """select fn_before.wind_code, fn_before.nav_date nav_date_before, fn_after.nav_date nav_date_after, fn_before.nav_acc nav_acc_before, fn_after.nav_acc nav_acc_after, fn_after.nav_acc / fn_before.nav_acc nav_acc_pct from fund_nav fn_before, fund_nav fn_after, ( select wind_code, max(if(nav_date<%s, nav_date, null)) nav_date_before, min(if(nav_date>=%s, nav_date, null)) nav_date_after from fund_nav group by wind_code having nav_date_before is not null and nav_date_after is not null ) fn_date where fn_before.nav_date = fn_date.nav_date_before and fn_before.wind_code = fn_date.wind_code and fn_after.nav_date = fn_date.nav_date_after and fn_after.wind_code = fn_date.wind_code and fn_after.nav_acc / fn_before.nav_acc < 0.5 """ engine = get_db_engine() data_df = pd.read_sql(sql_str, engine, params=[date_str, date_str]) data_count = data_df.shape[0] if data_count == 0: logger.info('no data for clean on %s', date_str) return logger.info('\n%s', data_df) data_list = data_df.to_dict(orient='records') with get_db_session(engine) as session: for content in data_list: wind_code = content['wind_code'] nav_date_before = content['nav_date_before'] logger.info('update wind_code=%s nav_date<=%s', wind_code, nav_date_before) sql_str = "update fund_nav set nav = nav/100, nav_acc = nav_acc/100 where wind_code = :wind_code and nav_date <= :nav_date" session.execute(sql_str, params={ 'wind_code': wind_code, 'nav_date': nav_date_before })
def import_wind_index_daily(): """导入指数数据""" engine = get_db_engine() yesterday = date.today() - timedelta(days=1) query = pd.read_sql_query( 'select wind_code,index_name, max(trade_date) as latest_date from wind_index_daily group by wind_code', engine) query.set_index('wind_code', inplace=True) with get_db_session(engine) as session: # 获取市场有效交易日数据 sql_str = "select trade_date from wind_trade_date where trade_date > '2005-1-1'" table = session.execute(sql_str) trade_date_sorted_list = [t[0] for t in table.fetchall()] trade_date_sorted_list.sort() date_to = get_last(trade_date_sorted_list, lambda x: x <= yesterday) logger.info('%d indexes will been import', query.shape[0]) for code in query.index: date_from = (query.loc[code, 'latest_date'] + timedelta(days=1)) date_from = get_first(trade_date_sorted_list, lambda x: x >= date_from) if date_from is None or date_to is None or date_from > date_to: continue index_name = query.loc[code, 'index_name'] temp = rest.wsd(code, "open,high,low,close,volume,amt,turn,free_turn", date_from, date_to) temp.reset_index(inplace=True) temp.rename(columns={'index': 'trade_date'}, inplace=True) temp.trade_date = pd.to_datetime(temp.trade_date) temp.trade_date = temp.trade_date.map(lambda x: x.date()) temp['wind_code'] = code temp['index_name'] = index_name temp.set_index(['wind_code', 'trade_date'], inplace=True) temp.to_sql('wind_index_daily', engine, if_exists='append', index_label=['wind_code', 'trade_date'], dtype={ 'wind_code': String(20), 'trade_date': Date, }) logger.info('Success update %s - %s' % (code, index_name))
def calc_wind_code_weighted_index(wind_code_dic, date_since, file_name=None): """ 计算 wind_code_list 组成的指数 :param wind_code_list: :param date_since: :param file_name: 默认为None,不生成文件 :return: 合成后的指数每日收益率列表 """ # 获取样本子基金行情数据 wind_code_list_str = ', '.join( ["'" + wind_code + "'" for wind_code in wind_code_dic]) query_base_str = r'''select fv.wind_code, nav_date_week, fv.nav_acc from ( select wind_code, adddate(nav_date, 4 - weekday(nav_date)) as nav_date_week, max(nav_date) as nav_date_max from fund_nav where wind_code in (%s) group by wind_code, nav_date_week ) as ffv, fund_nav fv where ffv.nav_date_week >= '%s' and fv.wind_code = ffv.wind_code and fv.nav_date = ffv.nav_date_max group by wind_code, nav_date_week order by nav_date_week desc''' query_str = query_base_str % (wind_code_list_str, date_since) # print(query_str) engine = get_db_engine() fund_nav_df = pd.read_sql_query(query_str, engine) # 获取样本子基金名称 sql_str = """select wind_code, sec_name from fund_info where wind_code in (%s)""" query_str = sql_str % wind_code_list_str with get_db_session(engine) as session: table = session.execute(query_str) fund_code_name_dic = dict(table.fetchall()) # print(df_fund_nav) df_fund = fund_nav_df.pivot(index='nav_date_week', columns='wind_code', values='nav_acc') df_fund.rename(columns=fund_code_name_dic, inplace=True) weight_dic = { fund_name: wind_code_dic[wind_code] for wind_code, fund_name in fund_code_name_dic.items() } # df_fund.to_csv('%s-%s【%d】 %s_%s.csv' % (strategy_name, sample_name, len(wind_code_list), date_from, date_to)) df_fund.interpolate(inplace=True) df_fund.dropna(inplace=True) fund_name_list = list(df_fund.columns) wind_code_count = len(fund_name_list) if wind_code_count == 0: print('wind_code_list_str has no data') # df_fund.to_csv('%s_df_fund.csv' % sample_name) weight = 1 / wind_code_count # print(df_fund) fund_pct_df = df_fund.pct_change().fillna(0) if file_name is not None: file_path = get_cache_file_path(file_name) fund_index_df = (1 + fund_pct_df).cumprod() fund_index_df.to_csv(file_path) # fund_pct_df *= weight # print(df_fund_pct) nav_index_pct_s = None for wind_code in fund_name_list: if nav_index_pct_s is None: nav_index_pct_s = fund_pct_df[wind_code] * weight_dic[wind_code] else: nav_index_pct_s += fund_pct_df[wind_code] * weight_dic[wind_code] # print("df_nav_index_pct_s[%s]:\n" % wind_code, df_nav_index_pct_s) date_list = list(fund_pct_df.index) if len(date_list) == 0: file_path = get_cache_file_path('df_fund_%s_%s.csv' % (file_name, date_since)) print('子基金净值日期交集为空, 参见 %s文件查看具体数据' % file_path) df_fund.to_csv(file_path) print('between: %s ~ %s' % (min(date_list), max(date_list))) return nav_index_pct_s
def update_fof_stg_pct(wind_code_p): """ 根据子基金投资额及子基金策略比例调整fof基金总体策略比例 :param wind_code_p: fof基金代码 :return: """ # 获取子基金投资额 # sql_str = "select wind_code_s, date_adj, invest_scale from fof_fund_pct where wind_code_p = %s" sql_str = """select ffm.wind_code, date_adj, sum(invest_scale) invest_scale from fof_fund_pct ffp, fund_essential_info ffm where ffp.wind_code_p = %s and ffp.wind_code_s = ffm.wind_code_s GROUP BY ffm.wind_code, date_adj""" engine = get_db_engine() data_df = pd.read_sql(sql_str, engine, params=[wind_code_p]) if data_df.shape[0] == 0: logger.warning('%s 没有找到子基金策略信息') return fof_fund_df = data_df.pivot(columns='wind_code', index='date_adj', values='invest_scale') fof_fund_pct_df = fof_fund_df.fillna(0) / fof_fund_df.sum( axis=1).values.repeat(fof_fund_df.shape[1]).reshape(fof_fund_df.shape) # 获取子基金策略比例 sql_str = """select DISTINCT ffm.wind_code, stg_code, trade_date, stg_pct from fund_stg_pct sp, fund_essential_info ffm, (select wind_code_s from fof_fund_pct where wind_code_p = %s group by wind_code_s) fp where sp.wind_code = ffm.wind_code and ffm.wind_code_s = fp.wind_code_s;""" data_df = pd.read_sql(sql_str, engine, params=[wind_code_p]) fund_stg_df = data_df.set_index(['wind_code', 'stg_code', 'trade_date']).unstack().T.copy() # fund_stg_df.set_index(fund_stg_df.index.levels[1], inplace=True) # fund_stg_df.index = [dt.date() for dt in fund_stg_df.index] fund_stg_df.index = [dt.date() for dt in fund_stg_df.index.levels[1]] add_df = pd.DataFrame( columns=fund_stg_df.columns, index=list(set(fof_fund_pct_df.index) - set(fund_stg_df.index))) fund_stg_df = fund_stg_df.append(add_df).sort_index() for code in fund_stg_df.columns.levels[0]: is_fill_rows = fund_stg_df[code].sum(axis=1) != 0 for stg_code in fund_stg_df[code].columns: is_fill_rows_col = np.isnan(fund_stg_df[code][stg_code]) fill_df = fund_stg_df.loc[is_fill_rows & is_fill_rows_col, code] fill_df[stg_code] = 0 fund_stg_df.loc[is_fill_rows & is_fill_rows_col, code] = fill_df.values fund_stg_df.ffill(inplace=True) fund_stg_df.fillna(0, inplace=True) # 按日循环、调整各基金、各策略比例 adj_date_last = None fund_stg_df_index = fund_stg_df.index wind_code_list = list(fund_stg_df.columns.levels[0]) fof_fund_df_index = fof_fund_pct_df.index for trade_date in fund_stg_df_index: if trade_date in fof_fund_df_index: adj_date_last = trade_date if adj_date_last is None: fund_stg_df.loc[trade_date, :] = 0 continue for code in wind_code_list: fund_stg_df.loc[trade_date, code] = ( fund_stg_df.loc[trade_date, code] * fof_fund_pct_df[code][adj_date_last]).values fund_stg_pct_df = fund_stg_df.sum(level=1, axis=1) fund_stg_pct_df = fund_stg_pct_df.drop( fund_stg_pct_df.index[fund_stg_pct_df.sum(axis=1) == 0]) fund_stg_pct_df = fund_stg_pct_df.unstack().reset_index() fund_stg_pct_df['wind_code'] = wind_code_p fund_stg_pct_df.rename(columns={ 'level_1': 'trade_date', 0: 'stg_pct' }, inplace=True) # fund_stg_pct_df.set_index('wind_code', inplace=True) # 清除原有记录 logger.info('调整基金策略比例信息:') for trade_date, df in fund_stg_pct_df.groupby('trade_date'): logger.info('交易日:%s 策略比例如下:\n%s', trade_date, df) with get_db_session(engine) as session: session.execute( 'delete from fund_stg_pct where wind_code = :wind_code', params={'wind_code': wind_code_p}) # 插入最新fof基金策略比例 fund_stg_pct_df.to_sql('fund_stg_pct', engine, if_exists='append', index=False)
def update_wind_fund_nav(get_df=False, wind_code_list=None): table_name = 'wind_fund_nav' rest = WindRest(WIND_REST_URL) # 初始化数据下载端口 # 初始化数据库engine engine = get_db_engine() # 链接数据库,并获取fundnav旧表 with get_db_session(engine) as session: table = session.execute( 'select wind_code, max(trade_date) from wind_fund_nav group by wind_code' ) fund_trade_date_latest_in_nav_dic = dict(table.fetchall()) wind_code_set_existed_in_nav = set( fund_trade_date_latest_in_nav_dic.keys()) # 获取wind_fund_info表信息 fund_info_df = pd.read_sql_query( """SELECT DISTINCT wind_code as wind_code,fund_setupdate,fund_maturitydate,trade_date_latest from fund_info group by wind_code""", engine) trade_date_latest_dic = { wind_code: trade_date_latest for wind_code, trade_date_latest in zip(fund_info_df['wind_code'], fund_info_df['fund_setupdate']) } fund_info_df.set_index('wind_code', inplace=True) if wind_code_list is None: wind_code_list = list(fund_info_df.index) else: wind_code_list = list(set(wind_code_list) & set(fund_info_df.index)) date_end = date.today() - timedelta(days=1) date_end_str = date_end.strftime(STR_FORMAT_DATE) fund_nav_all_df = [] no_data_count = 0 code_count = len(wind_code_list) # 对每个新获取的基金名称进行判断,若存在 fundnav 中,则只获取部分净值 wind_code_trade_date_latest = {} try: for i, wind_code in enumerate(wind_code_list): # 设定数据获取的起始日期 wind_code_trade_date_latest[wind_code] = date_end if wind_code in wind_code_set_existed_in_nav: trade_latest = fund_trade_date_latest_in_nav_dic[wind_code] if trade_latest >= date_end: continue date_begin = trade_latest + timedelta(days=1) else: date_begin = trade_date_latest_dic[wind_code] if date_begin is None: continue elif isinstance(date_begin, str): date_begin = datetime.strptime(date_begin, STR_FORMAT_DATE).date() if isinstance(date_begin, date): if date_begin.year < 1900: continue if date_begin > date_end: continue date_begin = date_begin.strftime('%Y-%m-%d') else: continue # 尝试获取 fund_nav 数据 for k in range(2): try: fund_nav_tmp_df = rest.wsd(codes=wind_code, fields='nav,NAV_acc,NAV_date', begin_time=date_begin, end_time=date_end_str, options='Fill=Previous') break except Exception as exp: logger.error("%s Failed, ErrorMsg: %s" % (wind_code, str(exp))) continue else: del wind_code_trade_date_latest[wind_code] fund_nav_tmp_df = None if fund_nav_tmp_df is None: logger.info('%s No data', wind_code) del wind_code_trade_date_latest[wind_code] no_data_count += 1 logger.warning('%d funds no data', no_data_count) else: fund_nav_tmp_df.dropna(how='all', inplace=True) df_len = fund_nav_tmp_df.shape[0] if df_len == 0: continue fund_nav_tmp_df['wind_code'] = wind_code # 此处删除 trade_date_latest 之后再加上,主要是为了避免因抛出异常而导致的该条数据也被记录更新 del wind_code_trade_date_latest[wind_code] trade_date_latest = fund_nav_df_2_sql(table_name, fund_nav_tmp_df, engine, is_append=True) if trade_date_latest is None: logger.error('%s[%d] data insert failed', wind_code) else: wind_code_trade_date_latest[wind_code] = trade_date_latest logger.info('%d) %s updated, %d funds left', i, wind_code, code_count - i) if get_df: fund_nav_all_df = fund_nav_all_df.append( fund_nav_tmp_df) finally: import_wind_fund_nav_to_fund_nav() update_trade_date_latest(wind_code_trade_date_latest) try: update_fund_mgrcomp_info() except: # 新功能上线前由于数据库表不存在,可能导致更新失败,属于正常现象 logger.exception('新功能上线前由于数据库表不存在,可能导致更新失败,属于正常现象') return fund_nav_all_df
def get_fund_nav_with_index(wind_code, date_from_str, date_to_str, quantile_list=[0.5], normalized=True, use_alias=False): """ 获取基金净值与对应策略指数的对比走势 :param wind_code: :param date_from_str: :param date_to_str: :param quantile_list: :param normalized: :return: """ engine = get_db_engine() # 获取策略信息 with get_db_session(engine) as session: table = session.execute( 'select sec_name, alias, strategy_type from fund_info where wind_code = :wind_code', params={'wind_code': wind_code}) content = table.first() if content is None: logger.error('没有找到%s的策略类型', wind_code) return None sec_name, alias, strategy_type_cn = content # logger.debug(strategy_type_cn) if strategy_type_cn not in STRATEGY_TYPE_CN_EN_DIC: logger.error("基金%s 策略%s 没有对应的英文") strategy_type_en = STRATEGY_TYPE_CN_EN_DIC[strategy_type_cn] # 获取周级别净值信息 sql_str = """select nav_date_week, fv.nav_acc from ( select wind_code, adddate(nav_date, 4 - weekday(nav_date)) as nav_date_week, max(nav_date) as nav_date_max from fund_nav where wind_code = %s group by wind_code, nav_date_week having nav_date_week between %s and %s ) as ffv, fund_nav fv where fv.wind_code = %s and fv.wind_code = ffv.wind_code and fv.nav_date = ffv.nav_date_max group by fv.wind_code, nav_date_week order by nav_date_week""" fund_nav_df = pd.read_sql( sql_str, engine, params=[wind_code, date_from_str, date_to_str, wind_code]) column_name = alias if use_alias else wind_code + sec_name fund_nav_df.rename(columns={'nav_acc': column_name}, inplace=True) fund_nav_df.set_index('nav_date_week', inplace=True) # logger.debug('fund_nav_df.shape:%s', fund_nav_df.shape) # logger.debug('fund_nav_df:\n%s', fund_nav_df) # 获取周级别策略指数信息 stg_index_s, stg_statistic_dic = get_strategy_index_by_name( strategy_type_en, date_from_str, date_to_str, create_csv=False) # logger.debug("\n%s", stg_index_s) fund_nav_df[strategy_type_cn] = stg_index_s if normalized: fund_nav_df = (1 + fund_nav_df.pct_change().fillna(0)).cumprod() return fund_nav_df
def import_wind_future_info(): # 获取已存在合约列表 sql_str = 'select wind_code, ipo_date from wind_future_info' engine = get_db_engine() with get_db_session(engine) as session: table = session.execute(sql_str) wind_code_ipo_date_dic = dict(table.fetchall()) # 通过wind获取合约列表 # w.start() rest = WindRest(WIND_REST_URL) # 初始化服务器接口,用于下载万得数据 future_sectorid_dic_list = [ { 'subject_name': 'CFE 沪深300', 'regex': r"IF\d{4}\.CFE", 'sectorid': 'a599010102000000', 'date_establish': '2010-4-16' }, { 'subject_name': 'CFE 上证50', 'regex': r"IH\d{4}\.CFE", 'sectorid': '1000014871000000', 'date_establish': '2015-4-16' }, { 'subject_name': 'CFE 中证500', 'regex': r"IC\d{4}\.CFE", 'sectorid': '1000014872000000', 'date_establish': '2015-4-16' }, { 'subject_name': 'SHFE 黄金', 'regex': r"AU\d{4}\.SHF", 'sectorid': 'a599010205000000', 'date_establish': '2008-01-09' }, { 'subject_name': 'SHFE 沪银', 'regex': r"AG\d{4}\.SHF", 'sectorid': '1000006502000000', 'date_establish': '2012-05-10' }, { 'subject_name': 'SHFE 螺纹钢', 'regex': r"RB\d{4}\.SHF", 'sectorid': 'a599010206000000', 'date_establish': '2009-03-27' }, { 'subject_name': 'SHFE 热卷', 'regex': r"HC\d{4}\.SHF", 'sectorid': '1000011455000000', 'date_establish': '2014-03-21' }, { 'subject_name': 'DCE 焦炭', 'regex': r"J\d{4}\.SHF", 'sectorid': '1000002976000000', 'date_establish': '2011-04-15' }, { 'subject_name': 'DCE 焦煤', 'regex': r"JM\d{4}\.SHF", 'sectorid': '1000009338000000', 'date_establish': '2013-03-22' }, { 'subject_name': '铁矿石', 'regex': r"I\d{4}\.SHF", 'sectorid': '1000006502000000', 'date_establish': '2013-10-18' }, ] wind_code_set = set() ndays_per_update = 60 # 获取历史期货合约列表信息 for future_sectorid_dic in future_sectorid_dic_list: subject_name = future_sectorid_dic['subject_name'] sector_id = future_sectorid_dic['sectorid'] regex_str = future_sectorid_dic['regex'] date_establish = datetime.strptime( future_sectorid_dic['date_establish'], STR_FORMAT_DATE).date() date_since = get_date_since(wind_code_ipo_date_dic, regex_str, date_establish) date_yestoday = date.today() - timedelta(days=1) while date_since <= date_yestoday: date_since_str = date_since.strftime(STR_FORMAT_DATE) # w.wset("sectorconstituent","date=2017-05-02;sectorid=a599010205000000") # future_info_df = wset_cache(w, "sectorconstituent", "date=%s;sectorid=%s" % (date_since_str, sector_id)) future_info_df = rest.wset( "sectorconstituent", "date=%s;sectorid=%s" % (date_since_str, sector_id)) wind_code_set |= set(future_info_df['wind_code']) # future_info_df = future_info_df[['wind_code', 'sec_name']] # future_info_dic_list = future_info_df.to_dict(orient='records') # for future_info_dic in future_info_dic_list: # wind_code = future_info_dic['wind_code'] # if wind_code not in wind_code_future_info_dic: # wind_code_future_info_dic[wind_code] = future_info_dic if date_since >= date_yestoday: break else: date_since += timedelta(days=ndays_per_update) if date_since > date_yestoday: date_since = date_yestoday # 获取合约列表 wind_code_list = [ wc for wc in wind_code_set if wc not in wind_code_ipo_date_dic ] # 获取合约基本信息 # w.wss("AU1706.SHF,AG1612.SHF,AU0806.SHF", "ipo_date,sec_name,sec_englishname,exch_eng,lasttrade_date,lastdelivery_date,dlmonth,lprice,sccode,margin,punit,changelt,mfprice,contractmultiplier,ftmargins,trade_code") # future_info_df = wss_cache(w, wind_code_list, # "ipo_date,sec_name,sec_englishname,exch_eng,lasttrade_date,lastdelivery_date,dlmonth,lprice,sccode,margin,punit,changelt,mfprice,contractmultiplier,ftmargins,trade_code") if len(wind_code_list) > 0: future_info_df = rest.wss( wind_code_list, "ipo_date,sec_name,sec_englishname,exch_eng,lasttrade_date,lastdelivery_date,dlmonth,lprice,sccode,margin,punit,changelt,mfprice,contractmultiplier,ftmargins,trade_code" ) future_info_df['MFPRICE'] = future_info_df['MFPRICE'].apply( mfprice_2_num) future_info_count = future_info_df.shape[0] future_info_df.rename( columns={c: str.lower(c) for c in future_info_df.columns}, inplace=True) future_info_df.index.rename('wind_code', inplace=True) future_info_df.to_sql('wind_future_info', engine, if_exists='append', dtype={ 'wind_code': String(20), 'trade_code': String(20), 'sec_name': String(50), 'sec_englishname': String(50), 'exch_eng': String(50), 'ipo_date': Date, 'lasttrade_date': Date, 'lastdelivery_date': Date, 'dlmonth': String(20), 'lprice': Float, 'sccode': String(20), 'margin': Float, 'punit': String(20), 'changelt': Float, 'mfprice': Float, 'contractmultiplier': Float, 'ftmargins': String(100), }) print('%d data import' % future_info_count)
def update_wind_fund_info(get_df=False, mode='replace_insert'): table_name = 'wind_fund_info' rest = WindRest(WIND_REST_URL) # 初始化服务器接口,用于下载万得数据 engine = get_db_engine() # 初始化数据库,并获取旧表信息 old = pd.read_sql_query('select wind_code from %s' % table_name, engine) old_set = set(old['wind_code']) # 从万得获取最新基金列表 types = {u'股票多头策略': 1000023122000000, u'股票多空策略': 1000023123000000, u'其他股票策略': 1000023124000000, u'阿尔法策略': 1000023125000000, u'其他市场中性策略': 1000023126000000, u'事件驱动策略': 1000023113000000, u'债券策略': 1000023114000000, u'套利策略': 1000023115000000, u'宏观策略': 1000023116000000, u'管理期货策略': 1000023117000000, u'组合基金策略': 1000023118000000, u'货币市场策略': 1000023119000000, u'多策略': 100002312000000, u'其他策略': 1000023121000000} df = pd.DataFrame() yestday = (date.today() - timedelta(days=1)).strftime('%Y-%m-%d') for i in types.keys(): temp = rest.wset("sectorconstituent", "date=%s;sectorid=%s" % (yestday, str(types[i]))) temp['strategy_type'] = i logging.info('%s sectorconstituent %s df.shape:%s', yestday, i, temp.shape) df = pd.concat([df, temp], axis=0) fund_types_df = df[['wind_code', 'sec_name', 'strategy_type']] new_set = set(fund_types_df['wind_code']) target_set = new_set.difference(old_set) # in new_set but not old_set fund_types_df.set_index('wind_code', inplace=True) # 获取新成立基金各项基本面信息 code_list = list(target_set) code_count = len(code_list) seg_count = 2500 # info_df = None info_df = pd.DataFrame() for n in range(math.ceil(float(code_count) / seg_count)): num_start = n * seg_count num_end = (n + 1) * seg_count num_end = num_end if num_end <= code_count else code_count if num_start <= code_count: codes = ','.join(code_list[num_start:num_end]) # 分段获取基金成立日期数据 info2_df = rest.wss(codes, "fund_setupdate,fund_maturitydate,fund_mgrcomp,fund_existingyear,fund_ptmyear,fund_type,fund_fundmanager") logging.info('%05d ) [%d %d]' % (n, num_start, num_end)) info_df = info_df.append(info2_df) else: break # 没有新增基金,直接退出 if info_df.shape[0] == 0: return # 整理数据插入 wind_fund_info 表 # info_df['FUND_SETUPDATE'] = pd.to_datetime(info_df['FUND_SETUPDATE']).apply(lambda x: x.date()) info_df['FUND_SETUPDATE'] = info_df['FUND_SETUPDATE'].apply(str_2_date) # info_df['FUND_MATURITYDATE'] = pd.to_datetime(info_df['FUND_MATURITYDATE']).apply(lambda x: x.date()) info_df['FUND_MATURITYDATE'] = info_df['FUND_MATURITYDATE'].apply(str_2_date) info_df = fund_types_df.join(info_df, how='right') info_df.rename(columns={'FUND_SETUPDATE': 'fund_setupdate', 'FUND_MATURITYDATE': 'fund_maturitydate', 'FUND_MGRCOMP': 'fund_mgrcomp', 'FUND_EXISTINGYEAR': 'fund_existingyear', 'FUND_PTMYEAR': 'fund_ptmyear', 'FUND_TYPE': 'fund_type', 'FUND_FUNDMANAGER': 'fund_fundmanager' }, inplace=True) info_df.index.names = ['wind_code'] info_df.drop_duplicates(inplace=True) if mode == 'append': info_df.to_sql(table_name, engine, if_exists='append', dtype={ 'wind_code': String(200), 'sec_name': String(200), 'strategy_type': String(200), 'fund_setupdate': Date, 'fund_maturitydate': Date }) elif mode == 'replace_insert': info_df.reset_index(inplace=True) data_list = list(info_df.T.to_dict().values()) sql_str = """REPLACE INTO wind_fund_info (wind_code,sec_name,strategy_type,fund_setupdate,fund_maturitydate,fund_mgrcomp,fund_existingyear,fund_ptmyear,fund_type,fund_fundmanager) VALUES (:wind_code,:sec_name,:strategy_type,:fund_setupdate,:fund_maturitydate,:fund_mgrcomp,:fund_existingyear,:fund_ptmyear,:fund_type,:fund_fundmanager); """ with get_db_session() as session: session.execute(sql_str, data_list) else: raise ValueError('mode="%s" is not available' % mode) logging.info('%d funds inserted' % info_df.shape[0]) # wind_fund_info 表中增量数据插入到 fund_info sql_str = """insert into fund_info(wind_code, sec_name, strategy_type, fund_setupdate, fund_maturitydate, fund_mgrcomp, fund_existingyear, fund_ptmyear, fund_type, fund_fundmanager) select wfi.wind_code, wfi.sec_name, wfi.strategy_type, wfi.fund_setupdate, wfi.fund_maturitydate, wfi.fund_mgrcomp, wfi.fund_existingyear, wfi.fund_ptmyear, wfi.fund_type, wfi.fund_fundmanager from wind_fund_info wfi left outer join fund_info fi on wfi.wind_code=fi.wind_code where fi.wind_code is null""" with get_db_session(engine) as session: table = session.execute(sql_str) logging.info('new data was inserted into fund_info from wind_fund_info table') if get_df: return info_df
def import_smfund_info(): w.start() types = { u'主动股票型分级母基金': 1000007766000000, u'被动股票型分级母基金': 1000007767000000, u'纯债券型分级母基金': 1000007768000000, u'混合债券型分级母基金': 1000007769000000, u'混合型分级母基金': 1000026143000000, u'QDII分级母基金': 1000019779000000 } # 获取各个历史时段的分级基金列表,并汇总全部基金代码 today = date.today().strftime('%Y-%m-%d') dates = ['2011-01-01', '2013-01-01', '2015-01-01', '2017-01-01'] # 分三个时间点获取市场上所有分级基金产品 df = pd.DataFrame() for date_p in dates: temp_df = wset_cache(w, "sectorconstituent", "date=%s;sectorid=1000006545000000" % date_p) df = df.append(temp_df) wind_code_all = df['wind_code'].unique() # 查询数据库,剔除已存在的基金代码 with get_db_session() as session: table = session.execute("select wind_code from wind_smfund_info") wind_code_existed = set([content[0] for content in table.fetchall()]) wind_code_new = list(set(wind_code_all) - wind_code_existed) wind_code_new = [code for code in wind_code_new if code.find('!') < 0] if len(wind_code_new) == 0: print('no sm fund fow import') else: info_df = wss_cache(w, wind_code_new, 'fund_setupdate, fund_maturitydate') if info_df is None: raise Exception('no data') info_df['FUND_SETUPDATE'] = info_df['FUND_SETUPDATE'].apply( lambda x: x.date()) info_df['FUND_MATURITYDATE'] = info_df['FUND_MATURITYDATE'].apply( lambda x: x.date()) info_df.rename(columns={ 'FUND_SETUPDATE': 'fund_setupdate', 'FUND_MATURITYDATE': 'fund_maturitydate' }, inplace=True) field = "fund_type,wind_code,sec_name,class_a_code,class_a_name,class_b_code,class_b_name,a_pct,b_pct,upcv_nav,\ downcv_nav,track_indexcode,track_indexname,max_purchasefee,max_redemptionfee" df = pd.DataFrame() for code in info_df.index: beginDate = info_df.loc[code, 'fund_setupdate'].strftime('%Y-%m-%d') temp_df = wset_cache( w, "leveragedfundinfo", "date=%s;windcode=%s;field=%s" % (beginDate, code, field)) df = df.append(temp_df) df.set_index('wind_code', inplace=True) df['tradable'] = df.index.map(lambda x: x if 'S' in x else None) # df.index = df.index.map(lambda x: x[:-2] + 'OF') info_df = info_df.join(df, how='outer') info_df.rename( columns={ 'a_nav': 'nav_a', 'b_nav': 'nav_b', 'a_fs_inc': 'fs_inc_a', 'b_fs_inc': 'fs_inc_b' }) info_df.index.rename('wind_code', inplace=True) engine = get_db_engine() info_df.to_sql('wind_smfund_info', engine, if_exists='append', index_label='wind_code', dtype={ 'wind_code': String(20), 'fund_setupdate': Date, 'fund_maturitydate': Date, 'fund_type': String(20), 'sec_name': String(50), 'class_a_code': String(20), 'class_a_name': String(50), 'class_b_code': String(20), 'class_b_name': String(50), 'track_indexcode': String(20), 'track_indexname': String(50), 'tradable': String(20), }) w.close()
def get_fund_nav_calc_data_last(wind_code, nav_date, fund_calc_info=None, share_default=None, cash_amount_default=None, other_fee_default=None): """ 获取最近一个净值日期的 fund_nav_calc 数据作为参考计算数据 :param wind_code: :param nav_date: :return: fund_nav_calc_data_last_dic 返回dict对象,其中 has_data 为 true 标明没有数据 """ # 获取净值计算需要的基本信息 if fund_calc_info is None: fund_calc_info = get_fund_calc_info(wind_code) # 获取最近一个净值日期的 fund_nav_calc 数据作为参考计算数据 sql_str = """select nav_date, share, market_value, cash_amount, manage_fee, custodian_fee, admin_fee, storage_fee, other_fee, market_value + cash_amount cap_tot, market_value + cash_amount - (manage_fee + custodian_fee + admin_fee + storage_fee + other_fee) cap_net, nav from fund_nav_calc where wind_code = :wind_code and nav_date = (select max(nav_date) from fund_nav_calc where wind_code = :wind_code1 and nav_date < :nav_date)""" with get_db_session() as session: table = session.execute(sql_str, { 'wind_code': wind_code, 'wind_code1': wind_code, "nav_date": nav_date }) content = table.first() # logger.debug("%s", content) if content is None: # 没有历史记录,从0开始计算 logger.warning("fund %s 没有可用的上一净值日期数据,使用默认数据:\nfund_calc_info:%s", wind_code, fund_calc_info) has_data = False nav_date = fund_calc_info["date_start"] share = fund_calc_info["share_confirmed"] if share is None: share = 0 market_value = 0 cash_amount = fund_calc_info["invest_amount"] if cash_amount is None: cash_amount = 0 manage_fee, custodian_fee, admin_fee, storage_fee, other_fee = 0, 0, 0, 0, 0 else: # 存在历史记录,继承上一净值日期数据进行计算 has_data = True nav_date, share, market_value, cash_amount, manage_fee, custodian_fee, \ admin_fee, storage_fee, other_fee, cap_tot, cap_net, nav = content if share_default is not None: share = share_default if other_fee_default is not None: other_fee = other_fee_default if cash_amount_default is not None: cash_amount = cash_amount_default cap_tot = market_value + cash_amount cap_net = cap_tot - (manage_fee + custodian_fee + admin_fee + storage_fee + other_fee) if share == 0: nav = 0 logger.warning( "fund %s 没有可用的份额数据,无法计算净值,可以通过fund_essential_info表添加初始份额,或通过fund_nav_calc表添加上一净值日期的份额", wind_code) else: nav = cap_net / share # 查询记录进行汇总 fund_nav_calc_data_last_dic = { "has_data": has_data, "nav_date": nav_date, "share": share, "market_value": market_value, "cash_amount": cash_amount, "manage_fee": manage_fee, "custodian_fee": custodian_fee, "admin_fee": admin_fee, "storage_fee": storage_fee, "other_fee": other_fee, "cap_tot": cap_tot, "cap_net": cap_net, "nav": nav, } return fund_nav_calc_data_last_dic
def import_fund_sec_pct(wind_code, folder_path, mode='delete_insert'): file_names = os.listdir(folder_path) # file_names = [os.path.join(folder_path, 'SK8992_复华丰润稳健一期_估值表_20170124.xls')] nav_date_list, nav_list, nav_acc_list, nav_tot_list, hold_pos_df_list = [], [], [], [], [] cyb_pct_list = [] for file_name in file_names: # file_path = r'd:\Works\F复华投资\合同、协议\丰润\丰润一期\SK8992_复华丰润稳健一期_估值表_20170113.xls' file_path = os.path.join(folder_path, file_name) file_name_net, file_extension = os.path.splitext(file_path) if file_extension != '.xls': print('ignore:', file_name) continue else: print('handle:', file_name) # 获取净值日期 nav_date = get_fund_nav_date(file_path) # 获取股票持仓 # hold_pos_df, ret_dic = get_holding_df(file_path) sec_data_list, ret_dic = get_sec_data_list(file_path) # 增加日期列 for dic in sec_data_list: dic['nav_date'] = nav_date # hold_pos_df['nav_date'] = nav_date # 增加净值数据 # print(nav_date, nav, nav_acc, hold_pos_df) nav_date_list.append(nav_date) nav_list.append(ret_dic['nav']) nav_acc_list.append(ret_dic['nav_acc']) nav_tot_list.append(ret_dic['nav_tot']) hold_pos_df_list.extend(sec_data_list) # 临时提取创业板指仓位占比 # cyb_pct_list.append(ret_dic.setdefault('cyb_pct', 0)) for dic in hold_pos_df_list: dic['trade_status'] = filter_trade_status_str(dic['trade_status']) # 临时提取创业板指仓位占比 # cyb_pct_df = pd.DataFrame({'nav_date': nav_date_list, # 'cyb_pct': cyb_pct_list, # }) # cyb_pct_df.set_index('nav_date', inplace=True) # cyb_pct_df.to_csv('cyb_pct_df.csv') if mode is None: return engine = get_db_engine() # 更新净值 nav_df = pd.DataFrame({ 'nav_date': nav_date_list, 'nav': nav_list, 'nav_acc': nav_acc_list, 'nav_tot': nav_tot_list }) nav_df['wind_code'] = wind_code nav_df['source_mark'] = SourceMark.IMPORT nav_df.set_index(['nav_date', 'wind_code'], inplace=True) # nav_df.to_csv('fr_nav.csv') if mode == 'delete_insert': sql_str = 'delete from fund_nav where wind_code=:wind_code' with get_db_session(engine) as session: session.execute(sql_str, {"wind_code": wind_code}) print('clean fund_nav data on %s' % wind_code) nav_df.to_sql('fund_nav', engine, if_exists='append') print('%d data was imported into fund_nav on %s' % (nav_df.shape[0], wind_code)) # hold_pos_df_tot = pd.concat(hold_pos_df_list) # hold_pos_df_tot['wind_code'] = wind_code # hold_pos_df_tot.set_index(['wind_code', 'sec_code', 'nav_date', 'direction'], inplace=True) # print(hold_pos_df_tot.shape) # print(hold_pos_df_tot.index) # hold_pos_df_tot.to_csv('asdf.csv') # 删除历史数据 if mode == 'delete_insert': sql_str = 'delete from fund_sec_pct where wind_code=:wind_code' with get_db_session(engine) as session: session.execute(sql_str, {"wind_code": wind_code}) print('clean fund_sec_pct data on %s' % wind_code) # hold_pos_df_tot.to_sql('fund_sec_pct', engine, if_exists='append') # print('%d data imported into fund_sec_pct on %s', hold_pos_df_tot.shape[0], wind_code) # stock_df = get_stock_df(file_path) # print(stock_df) # 导入估值表 for dic in hold_pos_df_list: dic["wind_code"] = wind_code sec_df = pd.DataFrame(hold_pos_df_list).set_index( ['wind_code', 'sec_code', 'nav_date', 'direction']) sec_df.to_sql('fund_sec_pct', engine, if_exists='append') # with get_db_session(engine) as session: # sql_str = """INSERT INTO fund_sec_pct (wind_code, sec_code, nav_date, direction, position, cost_unit, cost_tot, cost_pct, value_tot, value_pct, trade_status, sec_type) # VALUES (:wind_code, :sec_code, :nav_date, :direction, :position, :cost_unit, :cost_tot, :cost_pct, :value_tot, :value_pct, :trade_status, :sec_type)""" # session.execute(sql_str, hold_pos_df_list) print('%d data was imported into fund_sec_pct on %s' % (len(hold_pos_df_list), wind_code))