def __init__(self, regress_length=25, regress_length_min=20, stock_ratio_up=0.95, stock_ratio_low=0.60, turnover_daily=0.035, index_code_list=None): Data.__init__(self) self.data_path = os.path.join(self.primary_data_path, r'fund_data\fund_stock_style_ratio') index_close = Index().get_index_cross_factor(factor_name='CLOSE') index_pct = index_close.pct_change() * 100 fund_return = FundFactor().get_fund_factor("Repair_Nav_Pct") self.data_return = pd.concat([index_pct, fund_return], axis=1) self.regress_length = regress_length self.regress_length_min = regress_length_min self.stock_ratio_up = stock_ratio_up self.stock_ratio_low = stock_ratio_low self.turnover_daily = turnover_daily if index_code_list is None: # 需要有债券指数 来测量股债仓位 self.index_code_list = [ "885062.WI", "801853.SI", "000300.SH", "000905.SH", "000852.SH", "399006.SZ" ] self.index_name_list = [ '短期纯债基金', '绩优股指数', '沪深300', '中证500', '中证1000', '创业板指' ] else: self.index_code_list = index_code_list
def cal_index_excess_pct(self): """ 计算指数超额收益率 """ name = "Market" index_pct = Index().get_index_factor("000985.CSI", attr=['CLOSE']) index_pct = index_pct.iloc[0:-1, :] index_pct = index_pct.pct_change() index_pct.columns = ['StockIndexReturn'] index_pct = index_pct['StockIndexReturn'] * 100 free_pct = Macro().get_daily_risk_free_rate() free_pct = free_pct['RiskFreeRate'] index_excess_pct = index_pct.sub(free_pct, axis='index') index_excess_pct = index_excess_pct.dropna() index_excess_pct = pd.DataFrame(index_excess_pct) index_excess_pct.columns = [name] index_excess_pct['CumSumReturn'] = index_excess_pct[name].cumsum() index_excess_pct.to_csv(os.path.join(self.data_path, 'factor_return', 'FactorReturn_%s.csv' % name))
def filter_fund_pool(self, index_code, index_name, end_date, track_error_up): """ 得到沪深300 、中证500基金池 """ # 参数 # end_date = "20181231" # index_name = '沪深300' # index_code = '000300.SH' # track_error_up = 0.03 beg_date = Date().get_trade_date_offset(end_date, -250) # 读取数据 fund_nav = Fund().get_fund_factor("Repair_Nav") index_close = Index().get_index_factor(index_code, attr=['CLOSE']) index_close.columns = [index_code] result = pd.DataFrame([], index=fund_nav.columns, columns=['跟踪误差', '数据长度']) # 计算最近1年跟踪误差数据 fund_nav = fund_nav.loc[index_close.index, :] fund_pct = fund_nav.pct_change() index_pct = index_close.pct_change() index_pct = index_pct[index_code] fund_excess_pct = fund_pct.sub(index_pct, axis='index') fund_excess_pct_period = fund_excess_pct.loc[beg_date:end_date, :] result.loc[:, "数据长度"] = fund_excess_pct_period.count() result.loc[:, "跟踪误差"] = fund_excess_pct_period.std() * np.sqrt(250) # 筛选 result = result.dropna() result = result[result['数据长度'] > self.data_min_length] result = result[result['跟踪误差'] < track_error_up] # concat fund basic info data_pd = Fund().get_wind_fund_info() data_pd = data_pd[[ 'BenchMark', 'Name', 'FullName', 'SetupDate', 'InvestType' ]] data_pd.columns = ['基金基准', '基金简称', '基金全称', '上市日期', '基金类型'] data = pd.concat([data_pd, result], axis=1) data = data.dropna() data = data[data["基金基准"].map(lambda x: index_name in x)] data = data[data["上市日期"] < beg_date] data = data[data["基金全称"].map(lambda x: "交易型开放式指数" not in x)] data = data[data["基金全称"].map(lambda x: "联接" not in x)] data['A类基金'] = data['基金简称'].map(Fund().if_a_fund) data = data[data['A类基金'] == 'A类基金'] # 输出结果 out_path = os.path.join(self.data_path, "filter_fund_pool") file_name = os.path.join(out_path, '基金最终筛选池_' + index_name + '.xlsx') sheet_name = "基金筛选池" num_format_pd = pd.DataFrame([], columns=data.columns, index=['format']) num_format_pd.ix['format', :] = '0.00' num_format_pd.ix['format', '跟踪误差'] = '0.00%' num_format_pd.ix['format', '数据长度'] = '0' excel = WriteExcel(file_name) worksheet = excel.add_worksheet(sheet_name) excel.write_pandas(data, worksheet, begin_row_number=0, begin_col_number=1, num_format_pd=num_format_pd, color="red", fillna=True)
def calculate_fund_factor(self, index_code, index_name, end_date): """ 计算基金最近一段时间内 跟踪误差、超额收益、信息比率 """ # 参数 # index_code = '000905.SH' # index_name = '中证500' # end_date = '20151231' beg_date = Date().get_trade_date_offset(end_date, -self.data_length) # 读取数据 基金池 基金净值数据 指数收盘价数据 file = os.path.join(self.data_path, 'filter_fund_pool', '基金最终筛选池_' + index_name + '.xlsx') fund_code = pd.read_excel(file, index_col=[1], encoding='gbk') fund_code['上市日期'] = fund_code['上市日期'].map(str) fund_nav = Fund().get_fund_factor("Repair_Nav") index_close = Index().get_index_factor(index_code, attr=['CLOSE']) index_close.columns = [index_code] # 筛选新基金 并下载基金规模 fund_code = fund_code.loc[:, ['上市日期', '基金全称', '基金简称']] fund_code = fund_code[fund_code['上市日期'] < beg_date] fund_code_str = ','.join(fund_code.index) fund_asset = w.wss(fund_code_str, "netasset_total", "unit=1;tradeDate=" + str(end_date)) fund_asset = pd.DataFrame(fund_asset.Data, index=['基金规模'], columns=fund_asset.Codes).T fund_asset['基金规模'] /= 100000000.0 fund_asset['基金规模'] = fund_asset['基金规模'].round(2) fund_asset = fund_asset[fund_asset['基金规模'] > 0.45] fund_info = pd.concat([fund_code, fund_asset], axis=1) fund_info = fund_info.dropna() # 计算最近1年 各项指标 result = pd.DataFrame([], index=fund_code.index, columns=['跟踪误差']) fund_nav = fund_nav.ix[index_close.index, fund_code.index] fund_pct = fund_nav.pct_change() index_pct = index_close.pct_change() index_pct = index_pct[index_code] fund_excess_pct = fund_pct.sub(index_pct, axis='index') fund_excess_pct_period = fund_excess_pct.loc[beg_date:end_date, :] fund_nav_period = fund_nav.loc[beg_date:end_date, :] index_close_period = index_close.loc[beg_date:end_date, :] result.ix[:, "数据长度"] = fund_excess_pct_period.count() result.ix[:, "跟踪误差"] = fund_excess_pct_period.std() * np.sqrt(250) fund_return_log = (fund_nav_period.pct_change() + 1.0).applymap( np.log).cumsum().ix[-1, :] fund_return = fund_return_log.map(np.exp) - 1 last_date_close = index_close_period.iloc[len(fund_nav_period) - 1, :] first_date_close = index_close_period.iloc[0, :] result.ix[:, "基金涨跌"] = fund_return result.ix[:, "指数涨跌"] = (last_date_close / first_date_close - 1.0).values[0] result.ix[:, "超额收益"] = result.ix[:, "基金涨跌"] - result.ix[:, "指数涨跌"] result.ix[:, "信息比率"] = result.ix[:, "超额收益"] / result.ix[:, "跟踪误差"] result = result[result['数据长度'] > self.data_min_length] result = pd.concat([fund_info, result], axis=1) result = result.sort_values(by=['信息比率'], ascending=False) result = result.dropna() result = result.fillna("") # 写到EXCEL表 out_path = os.path.join(self.data_path, "cal_fund_factor", index_name) file_name = os.path.join( out_path, '基金指标_' + index_name + '_' + end_date + '.xlsx') num_format_pd = pd.DataFrame([], columns=result.columns, index=['format']) num_format_pd.ix['format', :] = '0.00%' num_format_pd.ix['format', '数据长度'] = '0' num_format_pd.ix['format', '信息比率'] = '0.00' num_format_pd.ix['format', '基金规模'] = '0.00' num_format_pd.ix['format', '信息比率'] = '0.00' sheet_name = "基金指标" excel = WriteExcel(file_name) worksheet = excel.add_worksheet(sheet_name) excel.write_pandas(result, worksheet, begin_row_number=0, begin_col_number=0, num_format_pd=num_format_pd, color="red", fillna=True)
def AttributeMfctedaFund(index_code_ratio, fund_code, index_code, fund_name, beg_date, end_date, fund_id, path, type, mg_fee_ratio): """ 将某只基金 一段时间内 每日净值涨跌 拆分 """ # 参数举例 ###################################################################################### # index_code_ratio = 0.95 # fund_code = '162216.OF' # index_code = '000905.SH' # fund_name = '泰达中证500指数分级' # beg_date = '20180101' # end_date = '20180731' # fund_id = 38 # path = 'C:\\Users\\doufucheng\\OneDrive\\Desktop\\data\\' # type = '专户' # 读取基金复权涨跌幅 ################################################################################################ beg_date = Date().get_trade_date_offset(beg_date, -0) if type == "专户": fund_pct = MfcData().get_fund_nav_adjust( fund_name, Date().get_trade_date_offset(beg_date, -2), end_date) fund_pct['基金涨跌幅'] = fund_pct['累计复权净值'].pct_change() else: fund_pct = MfcData().get_mfcteda_public_fund_pct_wind( fund_code, beg_date, end_date) fund_pct.columns = ['基金涨跌幅'] # 指数收益 持仓数据 # 净值 = 股票资产 + 债券资产 + 基金资产 + 回购资产 + 当前现金余额 + 累计应收 - 累计应付 # 累计应收 和 累计应付 代表 每日申赎 计提 交易管理费用等 未结算至现金的部分 # 这里并没有按照每日拆分净值的方式计算 而是按照每日拆分当日总浮动盈亏 = 前日净值 * 当日基金复权涨跌幅 ################################################################################################ index_pct = Index().get_index_factor( index_code, Date().get_trade_date_offset(beg_date, -1), end_date, ['CLOSE']) index_pct = index_pct.pct_change() index_pct.columns = ['指数涨跌幅'] fund_asset = MfcData().get_fund_asset_period(fund_id, beg_date, end_date) close_unadjust = Stock().get_factor_h5("Price_Unadjust", None, "primary_mfc") adjust_factor = Stock().get_factor_h5("AdjustFactor", None, "primary_mfc") fund_asset['股票资产-汇总'] = fund_asset['股票资产'] data = pd.concat([fund_pct, index_pct, fund_asset], axis=1) data = data.dropna(subset=['基金涨跌幅', '指数涨跌幅']) data['昨日净值'] = data['净值'].shift(1) data['昨日基金份额'] = data['基金份额'].shift(1) data['昨日单位净值'] = data['单位净值'].shift(1) # 计算 每一日 新股收益 股票收益 ################################################################################################ date_series = Date().get_trade_date_series(beg_date, end_date) for i_date in range(len(date_series)): date = date_series[i_date] new_stock_return, new_stock_asset = CalNewStockReturnDaily( fund_name, date, path, close_unadjust, adjust_factor, cal_type="close") stock_return, mg_fee, trade_fee, stock_asset = CalStockReturnDaily( fund_name, date, path, close_unadjust, adjust_factor, mg_fee_ratio, cal_type="close") data.loc[date, '新股资产'] = new_stock_asset data.loc[date, '股票资产'] = stock_asset data.loc[date, '新股盈亏'] = new_stock_return data.loc[date, '股票盈亏'] = stock_return data.loc[date, '管理托管费用'] = mg_fee data.loc[date, '交易印花费用'] = trade_fee new_stock_return, new_stock_asset = CalNewStockReturnDaily( fund_name, date, path, close_unadjust, adjust_factor, cal_type="average") stock_return, mg_fee, trade_fee, stock_asset = CalStockReturnDaily( fund_name, date, path, close_unadjust, adjust_factor, mg_fee_ratio, cal_type="average") data.loc[date, '新股盈亏-TradePrice'] = new_stock_return data.loc[date, '股票盈亏-TradePrice'] = stock_return data = data.dropna(subset=['基金涨跌幅', '指数涨跌幅']) data[['新股盈亏', '当日股票总盈亏金额']] = data[['新股盈亏', '当日股票总盈亏金额']].fillna(0.0) data = data[data['股票资产'] > 0.0] data['股票仓位'] = data['股票资产'] / data['净值'] data['昨日股票仓位'] = data['股票仓位'].shift(1) # 资产盈亏 = 股票盈亏 + 新股盈亏 + 债券其他 + 托管管理费 + 交易印花费 ################################################################################################ cols = ['管理托管费用', '交易印花费用', '股票盈亏', '新股盈亏'] data[cols] = data[cols].fillna(0.0) data[ '汇总盈亏'] = data['管理托管费用'] + data['交易印花费用'] + data['股票盈亏'] + data['新股盈亏'] data['日内交易盈亏'] = data['股票盈亏'] - data['股票盈亏-TradePrice'] + data[ '新股盈亏'] - data['新股盈亏-TradePrice'] data['资产盈亏'] = data['基金涨跌幅'] * data['昨日净值'] data['固收其他盈亏'] = data['资产盈亏'] - data['汇总盈亏'] - data['日内交易盈亏'] data['昨日股票资产'] = data['股票资产'].shift(1) data['股票涨跌幅'] = data['股票盈亏'] / data['昨日股票资产'] # 股票盈亏 = 基准盈亏 + 超额盈亏 ################################################################################################ data['基准盈亏'] = data['指数涨跌幅'] * data['昨日净值'] * index_code_ratio data['超额盈亏'] = data['昨日股票仓位'] * data['股票涨跌幅'] * data['昨日净值'] - data['基准盈亏'] # 超额盈亏 = 择时(资产配置能力) + 选股能力 ################################################################################################ data['择时盈亏'] = (data['昨日股票仓位'] - index_code_ratio) * data['指数涨跌幅'] * data['昨日净值'] data['选股盈亏'] = data['昨日股票仓位'] * (data['股票涨跌幅'] - data['指数涨跌幅']) * data['昨日净值'] data['全仓选股盈亏'] = (data['股票涨跌幅'] - data['指数涨跌幅']) * data['昨日净值'] # 以单位净值计算 ################################################################################################ data['净值-资产盈亏'] = data['资产盈亏'] / data['昨日基金份额'] data['净值-管理托管费用'] = data['管理托管费用'] / data['昨日基金份额'] data['净值-交易印花费用'] = data['交易印花费用'] / data['昨日基金份额'] data['净值-股票盈亏'] = data['股票盈亏'] / data['昨日基金份额'] data['净值-新股盈亏'] = data['新股盈亏'] / data['昨日基金份额'] data['净值-固收其他盈亏'] = data['固收其他盈亏'] / data['昨日基金份额'] data['净值-日内交易盈亏'] = data['日内交易盈亏'] / data['昨日基金份额'] data['净值-基准盈亏'] = data['基准盈亏'] / data['昨日基金份额'] data['净值-超额盈亏'] = data['超额盈亏'] / data['昨日基金份额'] data['净值-择时盈亏'] = data['择时盈亏'] / data['昨日基金份额'] data['净值-选股盈亏'] = data['选股盈亏'] / data['昨日基金份额'] data['净值-全仓选股盈亏'] = data['全仓选股盈亏'] / data['昨日基金份额'] index = [ '净值-资产盈亏', '净值-股票盈亏', '净值-新股盈亏', '净值-固收其他盈亏', '净值-日内交易盈亏', '净值-管理托管费用', '净值-交易印花费用', '净值-基准盈亏', '净值-超额盈亏', '净值-择时盈亏', '净值-选股盈亏', '净值-全仓选股盈亏' ] # 按照 百分比 收益率计算 ################################################################################################ data = data.dropna(subset=['昨日单位净值']) nav = data.loc[data.index[0], '昨日单位净值'] pct = data['净值-资产盈亏'].sum() / nav result = pd.DataFrame([], columns=['净值变化', '百分比', '收益率'], index=index) result.loc[index, '净值变化'] = data.loc[:, index].sum() result.loc[index, '百分比'] = result.loc[index, '净值变化'] / result.loc['净值-资产盈亏', '净值变化'] result.loc[index, '收益率'] = result.loc[index, '百分比'] * pct # 年化收益率 开始时间 结束时间 ################################################################################################ result.index = [ '基金整体', '股票部分', '新股部分', '固收+其他部分', "日内交易部分", '管理托管', '交易印花', '股票基准', '股票超额', '股票择时', '股票选股', '全仓股票选股' ] days = (datetime.strptime(end_date, '%Y%m%d') - datetime.strptime(beg_date, '%Y%m%d')).days result.loc[:, '年化收益'] = result.loc[:, '收益率'].map(lambda x: (x + 1)** (365 / days) - 1.0) result.loc['股票仓位', :] = data['股票仓位'].mean() result.loc['开始时间', :] = data.index[0] result.loc['结束时间', :] = data.index[-1] ################################################################################################ # 写入每天的拆分 #################################################################################################################### num_format_pd = pd.DataFrame([], columns=data.columns, index=['format']) num_format_pd.ix['format', :] = '0.00' num_format_pd.ix['format', ['基金涨跌幅', '指数涨跌幅', '股票仓位', '昨日股票仓位', '股票涨跌幅']] = '0.00%' num_format_pd.ix['format', [ '单位净值', '昨日单位净值', '净值-管理托管费用', '净值-交易印花费用', '净值-股票盈亏', '净值-新股盈亏', '净值-固收其他盈亏', '净值-基准盈亏', '净值-择时盈亏', '净值-选股盈亏', '净值-资产盈亏', '净值-全仓选股盈亏' ]] = '0.0000' begin_row_number = 0 begin_col_number = 1 color = "red" save_path = os.path.join(path, fund_name, "整体") file_name = os.path.join( save_path, "归因_" + fund_name + '_' + str(data.index[0]) + '_' + str(data.index[-1]) + ".xlsx") if not os.path.exists(save_path): os.makedirs(save_path) sheet_name = fund_name excel = WriteExcel(file_name) worksheet = excel.add_worksheet(sheet_name) excel.write_pandas(data, worksheet, begin_row_number=begin_row_number, begin_col_number=begin_col_number, num_format_pd=num_format_pd, color=color, fillna=True) excel.close() # 写入汇总的拆分 #################################################################################################################### num_format_pd = pd.DataFrame([], columns=result.columns, index=['format']) num_format_pd.ix['format', :] = '0.00%' num_format_pd.ix['format', ['净值变化']] = '0.0000' begin_row_number = 0 begin_col_number = 1 color = "red" save_path = os.path.join(path, fund_name, "整体") file_name = os.path.join( save_path, "归因汇总_" + fund_name + '_' + str(data.index[0]) + '_' + str(data.index[-1]) + ".xlsx") if not os.path.exists(save_path): os.makedirs(save_path) sheet_name = fund_name excel = WriteExcel(file_name) worksheet = excel.add_worksheet(sheet_name) excel.write_pandas(result, worksheet, begin_row_number=begin_row_number, begin_col_number=begin_col_number, num_format_pd=num_format_pd, color=color, fillna=True) excel.close()
def back_test_timing_factor(self, factor_name, index_code): """ 回测择时指标 """ data = self.get_factor_exposure(factor_name) index_pct = Index().get_index_factor(index_code, attr=['CLOSE']) index_pct = index_pct.pct_change() index_pct.columns = ['IndexReturn'] data = pd.concat([data, index_pct['IndexReturn']], axis=1) data = data.dropna(subset=['RawTimer', 'Timer', 'IndexReturn']) data['IndexNextReturn'] = data['IndexReturn'].shift(-1) data['LongTimer'] = data['Timer'].map(lambda x: x if x >= 0 else 0) data['ShortTimer'] = data['Timer'].map(lambda x: x if x <= 0 else 0) data['SPortNextReturn'] = data['IndexNextReturn'] * data['ShortTimer'] data['LPortNextReturn'] = data['IndexNextReturn'] * data['LongTimer'] data['LSPortNextReturn'] = data['IndexNextReturn'] * data['Timer'] data['SPortCumReturn'] = data['SPortNextReturn'].cumsum() data['LPortCumReturn'] = data['LPortNextReturn'].cumsum() data['LSPortCumReturn'] = data['LSPortNextReturn'].cumsum() data['IndexCumReturn'] = data['IndexReturn'].cumsum() col_output = [ "SPortCumReturn", "LPortCumReturn", "LSPortCumReturn", "IndexCumReturn" ] data_plot = data[col_output] ax = data_plot.plot() fig = ax.get_figure() file = os.path.join(self.data_path, 'factor_picture', factor_name + 'fig.png') fig.savefig(file) result = pd.DataFrame([], columns=[factor_name]) pos_corr = data['IndexNextReturn'].corr(data['Timer']) raw_corr = data['IndexNextReturn'].corr(data['RawTimer']) mean_zero = data.loc[data['Timer'] == 0, 'IndexNextReturn'].mean() mean_positive_profit = data.loc[(data['Timer'] > 0) & (data['IndexNextReturn'] > 0), 'IndexNextReturn'].mean() mean_positive_loss = data.loc[(data['Timer'] > 0) & (data['IndexNextReturn'] <= 0), 'IndexNextReturn'].mean() mean_negative_loss = -data.loc[(data['Timer'] < 0) & (data['IndexNextReturn'] >= 0), 'IndexNextReturn'].mean() mean_negative_profit = -data.loc[(data['Timer'] < 0) & (data['IndexNextReturn'] < 0), 'IndexNextReturn'].mean() number_positive = len(data.loc[data['Timer'] > 0, 'IndexNextReturn']) number_negative = len(data.loc[data['Timer'] < 0, 'IndexNextReturn']) number_zero = len(data.loc[data['Timer'] == 0, 'IndexNextReturn']) number_positive_profit = len( data.loc[(data['Timer'] > 0) & (data['IndexNextReturn'] > 0), 'IndexNextReturn']) number_negative_profit = len( data.loc[(data['Timer'] < 0) & (data['IndexNextReturn'] < 0), 'IndexNextReturn']) positive_wining_ratio = number_positive_profit / number_positive positive_profit_loss_ratio = -mean_positive_profit / mean_positive_loss negative_wining_ratio = number_negative_profit / number_negative negative_profit_loss_ratio = -mean_negative_profit / mean_negative_loss result.loc['开始时间', factor_name] = data.index[0] result.loc['结束时间', factor_name] = data.index[-1] result.loc['仓位相关系数', factor_name] = pos_corr result.loc['原始相关系数', factor_name] = raw_corr result.loc['多头收益', factor_name] = mean_positive_profit result.loc['多头损失', factor_name] = mean_positive_loss result.loc['空头收益', factor_name] = mean_negative_profit result.loc['空头损失', factor_name] = mean_negative_loss result.loc['空仓收益', factor_name] = mean_zero result.loc['多头信号数量', factor_name] = number_positive result.loc['空头信号数量', factor_name] = number_negative result.loc['空仓信号数量', factor_name] = number_zero result.loc['多头胜率', factor_name] = positive_wining_ratio result.loc['多头盈亏比', factor_name] = positive_profit_loss_ratio result.loc['空头胜率', factor_name] = negative_wining_ratio result.loc['空头盈亏比', factor_name] = negative_profit_loss_ratio file = os.path.join(self.data_path, 'factor_backtest', factor_name + '_Result.csv') result.to_csv(file) file = os.path.join(self.data_path, 'factor_backtest', factor_name + '_Return.csv') data.to_csv(file)
def calculate_fund_factor_date(date, index_code, index_name, out_path): # 参数 ######################################################################################################## # index_code = '000905.SH' # index_name = '中证500' # date = '2015-12-31' # min_period = 200 # out_path = 'E:\\4_代码\\pycharmprojects\\31_雪球优选增强基金\\output_data\\cal_fund_factor\\zz500\\' # 日期数据 ######################################################################################################## min_period = 200 date_cur = datetime.strptime(date, "%Y%m%d") date_cur_int = date_cur.strftime('%Y%m%d') date_bef_1y = datetime(year=date_cur.year-1, month=date_cur.month, day=date_cur.day).strftime("%Y-%m-%d") date_aft_hy = (date_cur + pd.tseries.offsets.DateOffset(months=6, days=0)).strftime("%Y-%m-%d") # 读取数据 基金池 基金净值数据 指数收盘价数据 ######################################################################################################## path = os.path.join(out_path, 'filter_fund_pool\\') file = os.path.join(path, '基金最终筛选池_' + index_name + '.xlsx') fund_code = pd.read_excel(file, index_col=[1], encoding='gbk') fund_nav = Fund().get_fund_factor("Repair_Nav") index_close = Index().get_index_factor(index_code, None, None, attr=['CLOSE']) index_close.columns = [index_code] # 筛选新基金 并下载基金规模 ####################################################################################################### fund_code = fund_code.ix[:, ['上市日期', '基金简称']] fund_code = fund_code[fund_code['上市日期'] < date_bef_1y] fund_code_str = ','.join(fund_code.index) fund_asset = w.wss(fund_code_str, "netasset_total", "unit=1;tradeDate=" + str(date)) fund_asset = pd.DataFrame(fund_asset.Data, index=['基金规模'], columns=fund_asset.Codes).T fund_asset['基金规模'] /= 100000000.0 fund_asset['基金规模'] = fund_asset['基金规模'].round(2) fund_asset = fund_asset[fund_asset['基金规模'] > 0.5] fund_info = pd.concat([fund_code, fund_asset], axis=1) fund_info = fund_info.dropna() # 计算最近1年 各项指标 ######################################################################################################## result = pd.DataFrame([], index=fund_code.index, columns=['最近1年跟踪误差']) fund_nav = fund_nav.ix[index_close.index, fund_code.index] fund_pct = fund_nav.pct_change() index_pct = index_close.pct_change() index_pct = index_pct[index_code] fund_excess_pct = fund_pct.sub(index_pct, axis='index') fund_excess_pct_period = fund_excess_pct.ix[date_bef_1y:date, :] fund_nav_period = fund_nav.ix[date_bef_1y:date, :] index_close_prioed = index_close.ix[date_bef_1y:date, :] result.ix[:, "最近1年数据长度"] = fund_excess_pct_period.count() result.ix[:, "最近1年跟踪误差"] = fund_excess_pct_period.std() * np.sqrt(250) # last_date_nav = fund_nav_period.iloc[len(fund_nav_period)-1, :] # first_date_nav = fund_nav_period.iloc[0, :] fund_return_log =(fund_nav_period.pct_change()+1.0).applymap(np.log).cumsum().ix[-1,:] fund_return = fund_return_log.map(np.exp) - 1 last_date_close = index_close_prioed.iloc[len(fund_nav_period)-1, :] first_date_close = index_close_prioed.iloc[0, :] result.ix[:, "最近1年基金涨跌"] = fund_return result.ix[:, "最近1年指数涨跌"] = (last_date_close / first_date_close - 1.0).values[0] result.ix[:, "最近1年超额收益"] = result.ix[:, "最近1年基金涨跌"] - result.ix[:, "最近1年指数涨跌"] result.ix[:, "最近1年信息比率"] = result.ix[:, "最近1年超额收益"] / result.ix[:, "最近1年跟踪误差"] result = result[result['最近1年数据长度'] > min_period] # 计算之后半年 各项指标 ######################################################################################################## fund_excess_pct_period = fund_excess_pct.ix[date:date_aft_hy, :] fund_nav_period = fund_nav.ix[date:date_aft_hy, :] index_close_prioed = index_close.ix[date:date_aft_hy, :] result.ix[:, "之后半年数据长度"] = fund_excess_pct_period.count() result.ix[:, "之后半年跟踪误差"] = fund_excess_pct_period.std() * np.sqrt(250) try: fund_return_log = (fund_nav_period.pct_change() + 1.0).applymap(np.log).cumsum().ix[-1, :] fund_return = fund_return_log.map(np.exp) - 1 result.ix[:, "之后半年基金涨跌"] = fund_return except: result.ix[:, "之后半年基金涨跌"] = np.nan try: last_date_close = index_close_prioed.iloc[len(fund_nav_period) - 1, :] first_date_close = index_close_prioed.iloc[0, :] result.ix[:, "之后半年指数涨跌"] = (last_date_close / first_date_close - 1.0).values[0] except: result.ix[:, "之后半年指数涨跌"] = np.nan result.ix[:, "之后半年超额收益"] = result.ix[:, "之后半年基金涨跌"] - result.ix[:, "之后半年指数涨跌"] result.ix[:, "之后半年信息比率"] = result.ix[:, "之后半年超额收益"] / result.ix[:, "之后半年跟踪误差"] result = pd.concat([fund_info, result], axis=1) result = result.dropna(subset=["基金规模"]) result = result.fillna("") # 写到EXCEL表 ################################################################################################ out_path = os.path.join(out_path, "cal_fund_factor\\" + index_name) num_format_pd = pd.DataFrame([], columns=result.columns, index=['format']) num_format_pd.ix['format', :] = '0.00%' num_format_pd.ix['format', '之后半年数据长度'] = '0.00' num_format_pd.ix['format', '之后半年信息比率'] = '0.00' num_format_pd.ix['format', '基金规模'] = '0.00' num_format_pd.ix['format', '最近1年信息比率'] = '0.00' num_format_pd.ix['format', '最近1年数据长度'] = '0.00' begin_row_number = 0 begin_col_number = 0 color = "red" file_name = os.path.join(out_path, '基金指标_' + index_name + '_' + date_cur_int + '.xlsx') sheet_name = "基金指标" write_pandas(file_name, sheet_name, begin_row_number, begin_col_number, result, num_format_pd, color) ################################################################################################################ return True
def filter_fund_pool(index_code, begin_date, end_date, min_period, ipo_date, track_error_up, index_name, out_path): ############################################################################################# # begin_date = '2017-05-31' # end_date = '2018-05-31' # ipo_date = '2017-05-31' # min_period = 200 # index_name = '沪深300' # index_code = '000300.SH' # 读取数据 ############################################################################################# fund_nav = Fund().get_fund_factor("Repair_Nav") index_close = Index().get_index_factor(index_code, None, None, attr=['CLOSE']) index_close.columns = [index_code] result = pd.DataFrame([], index=fund_nav.columns, columns=['最近1年跟踪误差', '有效数据长度']) # 计算最近1年跟踪误差数据 ############################################################################################# fund_nav = fund_nav.ix[index_close.index, :] fund_pct = fund_nav.pct_change() index_pct = index_close.pct_change() index_pct = index_pct[index_code] fund_excess_pct = fund_pct.sub(index_pct, axis='index') fund_excess_pct_period = fund_excess_pct.ix[begin_date: end_date] result.ix[:, "有效数据长度"] = fund_excess_pct_period.count() result.ix[:, "最近1年跟踪误差"] = fund_excess_pct_period.std() * np.sqrt(250) # 筛选 ############################################################################################# result = result.dropna() result = result[result['有效数据长度'] > min_period] result = result[result['最近1年跟踪误差'] < track_error_up] code_str = ','.join(result.index) data = w.wss(code_str, "fund_benchmark,fund_fullname,fund_setupdate,fund_investtype") data_pd = pd.DataFrame(data.Data, index=data.Fields, columns=data.Codes).T data_pd.columns = ['基金基准', '基金全称', '上市日期', '基金类型'] data_pd['上市日期'] = data_pd['上市日期'].map(lambda x: x.strftime('%Y-%m-%d')) result = pd.concat([data_pd, result], axis=1) result = result[result["基金基准"].map(lambda x: index_name in x)] result = result[result["上市日期"] < ipo_date] result = result[result["基金全称"].map(lambda x: "交易型开放式指数" not in x)] result = result[result["基金全称"].map(lambda x: "联接" not in x)] # 输出结果 ############################################################################################ out_path = os.path.join(out_path, "filter_fund_pool") num_format_pd = pd.DataFrame([], columns=result.columns, index=['format']) num_format_pd.ix['format', :] = '0.00' begin_row_number = 0 begin_col_number = 1 color = "red" file_name = os.path.join(out_path, '基金初次筛选池_' + index_name + '.xlsx') sheet_name = "基金筛选池" write_pandas(file_name, sheet_name, begin_row_number, begin_col_number, result, num_format_pd, color)