def cal_factor_barra_growth_5years_sales_growth(self, beg_date, end_date): """ 过去5年每股营收 回归 等差数列 的系数 除以 每股盈利的平均值 """ report_data = Stock().read_factor_h5("ReportDateDaily") total_income = Stock().read_factor_h5("OperatingIncomeTotal") total_share = Stock().read_factor_h5("TotalShare") / 100000000 normal_date_series = Date().get_normal_date_series(total_share.columns[0], total_share.columns[-1]) total_share = total_share.loc[:, normal_date_series] total_share = total_share.T.fillna(method='pad', limit=10).T total_share, total_income = StockFactor().make_same_index_columns([total_share, total_income]) income_pre_share = total_income.div(total_share).T ips_ttm_growth = income_pre_share.rolling(4).sum() month = ips_ttm_growth.index[-1][4:6] ips_ttm_quarter = ips_ttm_growth.index ips_ttm_year = list(filter(lambda x: x[4:6] == month, list(ips_ttm_growth.index))) ips_ttm_growth = ips_ttm_growth.loc[ips_ttm_year, :] ips_ttm_growth = ips_ttm_growth.rolling(5).apply(self.slope) ips_ttm_growth = ips_ttm_growth.loc[ips_ttm_quarter, :] ips_ttm_growth = ips_ttm_growth.fillna(method='pad', limit=3).T ips_ttm_growth = StockFactor().change_quarter_to_daily_with_disclosure_date(ips_ttm_growth, report_data, beg_date, end_date) self.save_risk_factor_exposure(ips_ttm_growth, self.raw_factor_name_5y_sale) ips_ttm_growth = FactorPreProcess().remove_extreme_value_mad(ips_ttm_growth) ips_ttm_growth = FactorPreProcess().standardization(ips_ttm_growth) self.save_risk_factor_exposure(ips_ttm_growth, self.factor_name_5y_sale)
def cal_factor_barra_leverage_market_leverage(self): """ 市场杠杆 =(普通股市场价值 + 优先股账面价值 + 长期负债账面价值)/ 普通股市场价值 """ long_loan = Stock().read_factor_h5("LongTermLoanDaily") preferred_equity = Stock().read_factor_h5("PreferredEquityDaily") common_share = Stock().read_factor_h5("CommonShareDaily") price_unadjust = Stock().read_factor_h5("Price_Unadjust") [total_share, price_unadjust] = FactorPreProcess().make_same_index_columns( [common_share, price_unadjust]) common_mv = total_share.mul(price_unadjust) add = common_mv.add(long_loan, fill_value=0.0) add = add.add(preferred_equity, fill_value=0.0) market_leverage = add.div(common_mv) market_leverage = market_leverage.T.dropna(how='all').T self.save_risk_factor_exposure(market_leverage, self.raw_factor_name_market_leverage) market_leverage = FactorPreProcess().remove_extreme_value_mad( market_leverage) market_leverage = FactorPreProcess().standardization(market_leverage) self.save_risk_factor_exposure(market_leverage, self.factor_name_market_leverage)
def cal_factor_exposure(self, beg_date=None, end_date=None): """ 计算因子暴露 """ # read data size_data = self.get_risk_factor_exposure("cne5_normal_size") square_size_data = size_data**3 date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(size_data.columns)) date_series.sort() res_data = pd.DataFrame([]) # calculate everyday for i_date in range(len(date_series)): date = date_series[i_date] print('Calculating Barra Risk factor %s at date %s' % (self.factor_name, date)) regression_data = pd.concat( [size_data[date], square_size_data[date]], axis=1) regression_data.columns = ['x', 'y'] regression_data = regression_data.dropna() y = regression_data['y'].values x = regression_data['x'].values x_add = sm.add_constant(x) model = sm.OLS(y, x_add).fit() regression_data['res'] = regression_data['y'] - model.fittedvalues res_data_date = pd.DataFrame(regression_data['res']) res_data_date.columns = [date] res_data = pd.concat([res_data, res_data_date], axis=1) res_data = res_data.T.dropna(how='all').T res_data = FactorPreProcess().remove_extreme_value_mad(res_data) res_data = FactorPreProcess().standardization(res_data) self.save_risk_factor_exposure(res_data, self.factor_name)
def get_standard_alpha_factor(self, factor_name): """ 预处理Alpha因子 包括去极值、标准化 """ factor_data = self.get_alpha_factor_exposure(factor_name) factor_remove = FactorPreProcess().remove_extreme_value_mad( factor_data) factor_stand = FactorPreProcess().standardization(factor_remove) return factor_stand
def cal_factor_barra_growth_short_term_predicted_earnings_growth(self): """ 未来1年的预期盈利增长 """ predicted_earnings_growth = Stock().read_factor_h5("FEGR_1") self.save_risk_factor_exposure(predicted_earnings_growth, self.raw_factor_name_short_term) predicted_earnings_growth = FactorPreProcess().remove_extreme_value_mad(predicted_earnings_growth) predicted_earnings_growth = FactorPreProcess().standardization(predicted_earnings_growth) self.save_risk_factor_exposure(predicted_earnings_growth, self.factor_name_short_term)
def cal_trailing_earnings_to_price_ratio(self): """ 归母净利润TTM / 总市值 """ pe_ttm = Stock().read_factor_h5("PE_ttm") ep_ttm = 1.0 / pe_ttm ep_ttm = ep_ttm.T.dropna(how='all').T self.save_risk_factor_exposure(ep_ttm, self.raw_factor_name_trailing) ep_ttm = FactorPreProcess().remove_extreme_value_mad(ep_ttm) ep_ttm = FactorPreProcess().standardization(ep_ttm) self.save_risk_factor_exposure(ep_ttm, self.factor_name_trailing)
def cal_factor_liquidity_yearly(self): """ LIQUIDITY_STOM 最近252个交易日的换手率总和的对数值 """ P = 252 turnover_daily = Stock().read_factor_h5("TurnOver_Daily").T turnover_period = turnover_daily.rolling(window=P).sum().applymap(np.log) turnover_period = turnover_period.T.dropna(how='all') self.save_risk_factor_exposure(turnover_period, self.raw_factor_name_yearly) turnover_period = FactorPreProcess().remove_extreme_value_mad(turnover_period) turnover_period = FactorPreProcess().standardization(turnover_period) self.save_risk_factor_exposure(turnover_period, self.factor_name_yearly)
def cal_factor_exposure(self, beg_date, end_date): """ 流动性因子 LIQUIDITY LIQUIDITY = 0.35 * LIQUIDITY_STOM + 0.35 * LIQUIDITY_STOQ + 0.3 * LIQUIDITY_STOA LIQUIDITY 在对 SIZE 因子做回归取残差 """ # params self.cal_factor_liquidity_month() self.cal_factor_liquidity_quarter() self.cal_factor_liquidity_yearly() # calculate turnover_month = 0.35 * self.get_risk_factor_exposure(self.factor_name_month) turnover_quarter = 0.35 * self.get_risk_factor_exposure(self.factor_name_quarter) turnover_yearly = 0.30 * self.get_risk_factor_exposure(self.factor_name_yearly) liquidity = turnover_month.add(turnover_quarter, fill_value=0.0) liquidity = liquidity.add(turnover_yearly, fill_value=0.0) liquidity = liquidity.T.dropna(how='all').T # get res of regression size_data = self.get_risk_factor_exposure("cne5_normal_size") [size_data, liquidity] = FactorPreProcess().make_same_index_columns([size_data, liquidity]) date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(liquidity.columns)) date_series.sort() turnover_res = pd.DataFrame([]) for i_date in range(len(date_series)): date = date_series[i_date] print('Calculating Barra Risk factor %s at date %s' % (self.factor_name, date)) regression_data = pd.concat([size_data[date], liquidity[date]], axis=1) regression_data.columns = ['x', 'y'] regression_data = regression_data.dropna() y = regression_data['y'].values x = regression_data['x'].values x_add = sm.add_constant(x) model = sm.OLS(y, x_add).fit() regression_data['res'] = regression_data['y'] - model.fittedvalues res_date = pd.DataFrame(regression_data['res']) res_date.columns = [date] turnover_res = pd.concat([turnover_res, res_date], axis=1) turnover_res = FactorPreProcess().remove_extreme_value_mad(turnover_res) turnover_res = FactorPreProcess().standardization(turnover_res) self.save_risk_factor_exposure(turnover_res, self.factor_name)
def cal_factor_barra_leverage_debt_to_asset(self): """ 资产负债比 = 总负债/总资产 """ total_debt = Stock().read_factor_h5("TotalLiabilityDaily") total_asset = Stock().read_factor_h5('TotalAssetDaily') debt_to_asset = total_debt.div(total_asset) debt_to_asset = debt_to_asset.T.dropna(how='all').T self.save_risk_factor_exposure(debt_to_asset, self.raw_factor_name_debt_to_asset) debt_to_asset = FactorPreProcess().remove_extreme_value_mad( debt_to_asset) debt_to_asset = FactorPreProcess().standardization(debt_to_asset) self.save_risk_factor_exposure(debt_to_asset, self.factor_name_debt_to_asset)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # read data cfo = Stock().read_factor_h5("NetOperateCashFlow") cfo_ttm = Stock().change_single_quarter_to_ttm_quarter(cfo) total_share = Stock().read_factor_h5("TotalShare") price_unadjust = Stock().read_factor_h5("Price_Unadjust") report_data = Stock().read_factor_h5("ReportDateDaily") # data precessing cfo_ttm = Stock().change_quarter_to_daily_with_disclosure_date( cfo_ttm, report_data, beg_date, end_date) [total_share, price_unadjust] = FactorPreProcess().make_same_index_columns( [total_share, price_unadjust]) total_mv = total_share.mul(price_unadjust) / 100000000 [cfo_ttm, total_mv] = Stock().make_same_index_columns([cfo_ttm, total_mv]) cfno2p = cfo_ttm.div(total_mv) # save data cfno2p = cfno2p.T.dropna(how='all').T self.save_alpha_factor_exposure(cfno2p, self.raw_factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # params l, t, half_life, min_period = 21, 504, 126, 400 # read data pct = Stock().read_factor_h5("Pct_chg").T pct = np.log(pct / 100.0 + 1.0) * 100 # calculate data daily date_series = Date().get_trade_date_series(beg_date, end_date) res_data = pd.DataFrame([]) for i_date in range(len(date_series)): current_date = date_series[i_date] data_end = Date().get_trade_date_offset(current_date, -l + 1) data_beg = Date().get_trade_date_offset(current_date, -l - t + 2) pct_period = pct.loc[data_beg:data_end, :] pct_period = pct_period.dropna(how='all') count = pct_period.count() if len(pct_period) > min_period: print('Calculating Barra Risk factor %s at date %s' % (self.factor_name, current_date)) weight = TimeSeriesWeight().exponential_weight( len(pct_period), half_life) weight_mat = np.tile(np.row_stack(weight), (1, len(pct_period.columns))) weight_pd = pd.DataFrame(weight_mat, index=pct_period.index, columns=pct_period.columns) pct_weight = pct_period.mul(weight_pd) mon = pd.DataFrame(pct_weight.sum(skipna=False)) mon[count < min_period] = np.nan mon.columns = [current_date] res_data = pd.concat([res_data, mon], axis=1) else: print('Calculating Barra Risk factor %s at date %s is null' % (self.factor_name, current_date)) res_data = res_data.T.dropna(how='all').T self.save_risk_factor_exposure(res_data, self.raw_factor_name) res_data = FactorPreProcess().remove_extreme_value_mad(res_data) res_data = FactorPreProcess().standardization(res_data) self.save_risk_factor_exposure(res_data, self.factor_name)
def cal_predicted_earnings_to_price_ratio(self): """ 预期盈利 / 总市值 """ e1_predicted = Stock().read_factor_h5("FE_1") price_unadjust = Stock().read_factor_h5("Price_Unadjust") e1_predicted, price_unadjust = FactorPreProcess( ).make_same_index_columns([e1_predicted, price_unadjust]) ep1_predicted = e1_predicted.div(price_unadjust) ep1_predicted = ep1_predicted.T.dropna(how='all').T self.save_risk_factor_exposure(ep1_predicted, self.raw_factor_name_predicted) ep1_predicted = FactorPreProcess().remove_extreme_value_mad( ep1_predicted) ep1_predicted = FactorPreProcess().standardization(ep1_predicted) self.save_risk_factor_exposure(ep1_predicted, self.factor_name_predicted)
def cal_factor_exposure(self): """ 计算因子暴露 """ # read data price_unadjust = Stock().read_factor_h5("Price_Unadjust") total_share = Stock().read_factor_h5("TotalShare") # calculate data [price_unadjust, total_share] = FactorPreProcess().make_same_index_columns( [price_unadjust, total_share]) total_market_value = price_unadjust.mul(total_share) / 100000000 log_size_data = np.log(total_market_value) # save data self.save_risk_factor_exposure(log_size_data, self.raw_factor_name) log_size_data = FactorPreProcess().remove_extreme_value_mad( log_size_data) log_size_data = FactorPreProcess().standardization(log_size_data) self.save_risk_factor_exposure(log_size_data, self.factor_name)
def cal_factor_barra_leverage_book_leverage(self): """ 账面杠杆 =(普通股账面价值+优先股账面价值+长期负债)/ 普通股账面价值 """ holder_equity = Stock().read_factor_h5("TotalShareHoldeRequityDaily") preferred_equity = Stock().read_factor_h5("PreferredEquityDaily") common_equity = holder_equity.sub(preferred_equity) long_loan = Stock().read_factor_h5("LongTermLoanDaily") add = holder_equity.add(long_loan, fill_value=0.0) book_leverage = add.div(common_equity) book_leverage = book_leverage.T.dropna(how='all').T self.save_risk_factor_exposure(book_leverage, self.raw_factor_name_book_leverage) book_leverage = FactorPreProcess().remove_extreme_value_mad( book_leverage) book_leverage = FactorPreProcess().standardization(book_leverage) self.save_risk_factor_exposure(book_leverage, self.factor_name_book_leverage)
def cal_factor_exposure(self): """ 合成因子 """ self.cal_factor_barra_leverage_debt_to_asset() self.cal_factor_barra_leverage_market_leverage() self.cal_factor_barra_leverage_book_leverage() debt_to_asset = 0.35 * self.get_risk_factor_exposure( self.factor_name_debt_to_asset) market_leverage = 0.38 * self.get_risk_factor_exposure( self.factor_name_market_leverage) book_leverage = 0.27 * self.get_risk_factor_exposure( self.factor_name_book_leverage) leverage = debt_to_asset.add(market_leverage, fill_value=0.0) leverage = leverage.add(book_leverage, fill_value=0.0) leverage = FactorPreProcess().remove_extreme_value_mad(leverage) leverage = FactorPreProcess().standardization(leverage) self.save_risk_factor_exposure(leverage, self.factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 合成成长因子 """ self.cal_factor_barra_growth_long_term_predicted_earnings_growth() self.cal_factor_barra_growth_short_term_predicted_earnings_growth() self.cal_factor_barra_growth_5years_profit_growth(beg_date, end_date) self.cal_factor_barra_growth_5years_sales_growth(beg_date, end_date) long_predicted = 0.18 * self.get_risk_factor_exposure(self.factor_name_long_term) short_predicted = 0.11 * self.get_risk_factor_exposure(self.factor_name_short_term) profit = 0.24 * self.get_risk_factor_exposure(self.factor_name_5y_profit) sales = 0.47 * self.get_risk_factor_exposure(self.factor_name_5y_sale) growth = long_predicted.add(short_predicted, fill_value=0.0) growth = growth.add(profit, fill_value=0.0) growth = growth.add(sales, fill_value=0.0) growth = FactorPreProcess().remove_extreme_value_mad(growth) growth = FactorPreProcess().standardization(growth) self.save_risk_factor_exposure(growth, self.factor_name)
def cal_stock_covariance(self, date): """ 计算 股票 当日 股票协方差矩阵 sigma = B'FB + S """ factor_covariance = self.get_factor_covariance(date) exposure = self.get_factor_exposure_date( date, type_list=['COUNTRY', 'STYLE', 'INDUSTRY']) if exposure is not None and len(exposure) > 0: exposure = exposure[factor_covariance.columns] residual_risk = self.get_stock_residual_risk() residual_var_diag = np.diag( residual_risk[date].map(lambda x: x**2).values) code_list = residual_risk.index.values residual_var_diag = pd.DataFrame(residual_var_diag, index=code_list, columns=code_list) public_var = np.dot( np.dot(exposure.values, factor_covariance.values), exposure.T.values) code_list = exposure.index.values public_var = pd.DataFrame(public_var, index=code_list, columns=code_list) residual_var_diag, public_var = FactorPreProcess( ).make_same_index_columns([residual_var_diag, public_var]) total_cov = residual_var_diag.add(public_var) path = os.path.join(self.data_path, 'StockCovariance') if not os.path.exists(path): os.makedirs(path) print("Cal Stock Covariance Daily is %s" % date) file = os.path.join(path, "StockCovariance_%s.csv" % date) total_cov.to_csv(file) else: print("Exposure is None %s" % date)
def cal_cash_earnings_to_price_ratio(self, beg_date, end_date): """ 经营性现金流净额 / 总市值 """ nocf = Stock().read_factor_h5("NetOperateCashFlow") report_data = Stock().read_factor_h5("ReportDateDaily") nocf = Stock().change_single_quarter_to_ttm_quarter(nocf) nocf = Stock().change_quarter_to_daily_with_disclosure_date( nocf, report_data, beg_date, end_date) total_share = Stock().read_factor_h5("TotalShare") price_unadjust = Stock().read_factor_h5("Price_Unadjust") result = FactorPreProcess().make_same_index_columns( [nocf, total_share, price_unadjust]) nocf, total_share, price_unadjust = result total_mv = total_share.mul(price_unadjust) / 100000000 nocf_mv = nocf.div(total_mv) nocf_mv = nocf_mv.T.dropna(how='all').T self.save_risk_factor_exposure(nocf_mv, self.raw_factor_name_cash) nocf_mv = FactorPreProcess().remove_extreme_value_mad(nocf_mv) nocf_mv = FactorPreProcess().standardization(nocf_mv) self.save_risk_factor_exposure(nocf_mv, self.factor_name_cash)
def cal_factor_barra_growth_5years_profit_growth(self, beg_date, end_date): """ 过去5年每股盈利 回归 等差数列 的系数 除以 每股盈利的平均值 """ report_data = Stock().read_factor_h5("ReportDateDaily") eps = Stock().read_factor_h5("EPS_basic").T eps_ttm = eps.rolling(4).sum() month = eps_ttm.index[-1][4:6] eps_ttm_quarter = eps_ttm.index eps_ttm_year = list(filter(lambda x: x[4:6] == month, list(eps_ttm.index))) eps_ttm = eps_ttm.loc[eps_ttm_year, :] eps_ttm_growth = eps_ttm.rolling(5).apply(self.slope) eps_ttm_growth = eps_ttm_growth.loc[eps_ttm_quarter, :] eps_ttm_growth = eps_ttm_growth.fillna(method='pad', limit=3).T eps_ttm_growth = StockFactor().change_quarter_to_daily_with_disclosure_date(eps_ttm_growth, report_data, beg_date, end_date) self.save_risk_factor_exposure(eps_ttm_growth, self.raw_factor_name_5y_profit) eps_ttm_growth = FactorPreProcess().remove_extreme_value_mad(eps_ttm_growth) eps_ttm_growth = FactorPreProcess().standardization(eps_ttm_growth) self.save_risk_factor_exposure(eps_ttm_growth, self.factor_name_5y_profit)
def cal_factor_exposure(self, beg_date, end_date): """ 原始:0.68 * 未来一年预期盈利 / 总市值 + 0.21 * 经营性现金流净额TTM / 总市值 + 0.11 * 归母净利润TTM / 总市值 由于A股预期数据质量不高 调整三项数据占比 为 0.50 0.30 0.20 """ self.cal_predicted_earnings_to_price_ratio() self.cal_cash_earnings_to_price_ratio(beg_date, end_date) self.cal_trailing_earnings_to_price_ratio() predicted_ep = 0.50 * self.get_risk_factor_exposure( self.factor_name_predicted) cp = 0.30 * self.get_risk_factor_exposure(self.factor_name_cash) ep = 0.20 * self.get_risk_factor_exposure(self.factor_name_trailing) earning_yield = predicted_ep.add(cp, fill_value=0.0) earning_yield = earning_yield.add(ep, fill_value=0.0) earning_yield = FactorPreProcess().remove_extreme_value_mad( earning_yield) earning_yield = FactorPreProcess().standardization(earning_yield) self.save_risk_factor_exposure(earning_yield, self.factor_name)
def get_data_real(self): # 参数 ############################################################################################################### date = "20171229" factor_name = "ROEQuarterDaily" next_date = Date().get_trade_date_offset(date, 40) # read data ############################################################################################################### price = Stock().read_factor_h5("PriceCloseAdjust") alpha_val = Stock().read_factor_h5(factor_name, Stock().get_h5_path("my_alpha")) size = Stock().read_factor_h5("NORMAL_CNE5_SIZE", Stock().get_h5_path("my_barra_risk")) beta = Stock().read_factor_h5("NORMAL_CNE5_BETA", Stock().get_h5_path("my_barra_risk")) nolin_size = Stock().read_factor_h5( "NORMAL_CNE5_NON_LINEAR_SIZE", Stock().get_h5_path("my_barra_risk")) industry = Stock().read_factor_h5("industry_citic1") pct = pd.DataFrame(price[next_date] / price[date] - 1.0) # make same columns ############################################################################################################### industry_date = industry[date] industry_dummy_date = pd.get_dummies(industry_date) industry_columns = list( map(lambda x: 'industry_' + str(int(x)), list(industry_dummy_date.columns))) industry_dummy_date.columns = industry_columns data = pd.concat([ pct, alpha_val[date], size[date], beta[date], nolin_size[date], industry_dummy_date ], axis=1) data = data.dropna() data = data columns = ['pct', 'alpha', 'size', 'beta', 'nolin_size'] style_columns = ['size', 'beta', 'nolin_size'] columns.extend(industry_columns) data.columns = columns stand = FactorPreProcess().standardization( data[['alpha', 'size', 'beta', 'nolin_size']]) data[['alpha', 'size', 'beta', 'nolin_size']] = stand ############################################################################################################### return data, style_columns, industry_columns
def cal_factor_exposure(self): """ 计算因子暴露 """ # read data holder = Stock().read_factor_h5("TotalShareHoldeRequityDaily") total_share = Stock().read_factor_h5("TotalShare") price_unadjust = Stock().read_factor_h5("Price_Unadjust") # data precessing [total_share, price_unadjust] = FactorPreProcess().make_same_index_columns( [total_share, price_unadjust]) total_mv = total_share.mul(price_unadjust) [holder, total_mv] = Stock().make_same_index_columns([holder, total_mv]) holder_price = holder.div(total_mv) # save data pb_data = holder_price.T.dropna(how='all').T self.save_risk_factor_exposure(pb_data, self.raw_factor_name) pb_data = FactorPreProcess().remove_extreme_value_mad(pb_data) pb_data = FactorPreProcess().standardization(pb_data) self.save_risk_factor_exposure(pb_data, self.factor_name)
def change_name(path, dsname, change_dsname): from quant.utility.hdf_mfc import HdfMfc from quant.utility.factor_preprocess import FactorPreProcess import os filename = os.path.join(path, dsname + '.h5') change_filename = os.path.join(path, change_dsname + '.h5') HdfMfc(filename, dsname).rename(dsname, change_filename, change_dsname) data = HdfMfc(change_filename, change_dsname).read_hdf_factor(change_dsname) month_date_series = Date().get_trade_date_series( "20060101", "20180609", "M") month_data = data[month_date_series] corr = month_data.corr() corr.to_csv(os.path.join(path, change_dsname + '_MonthCorr.csv')) data_inv = FactorPreProcess().inv_normalization(data) # data_inv = data_inv.fillna(0.0) # data = FactorPreProcess().remove_extreme_value_mad(data) data = FactorPreProcess().standardization(data_inv) data.to_csv(os.path.join(path, change_dsname + '.csv')) # data = data.fillna(0.0) HdfMfc(filename, dsname).write_hdf_factor(change_filename, change_dsname, data)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # read data cash = Stock().read_factor_h5("CashEquivalents") total_share = Stock().read_factor_h5("TotalShare") price_unadjust = Stock().read_factor_h5("Price_Unadjust") report_data = Stock().read_factor_h5("ReportDateDaily") # data precessing cash = Stock().change_quarter_to_daily_with_disclosure_date(cash, report_data, beg_date, end_date) [total_share, price_unadjust] = FactorPreProcess().make_same_index_columns([total_share, price_unadjust]) total_mv = total_share.mul(price_unadjust) [cash, total_mv] = Stock().make_same_index_columns([cash, total_mv]) cp = 4 * cash.div(total_mv) # save data cp = cp.T.dropna(how='all').T self.save_alpha_factor_exposure(cp, self.raw_factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # read data holder = Stock().read_factor_h5("TotalShareHoldeRequity") total_share = Stock().read_factor_h5("TotalShare") price_unadjust = Stock().read_factor_h5("Price_Unadjust") report_data = Stock().read_factor_h5("ReportDateDaily") # data precessing holder = Stock().change_quarter_to_daily_with_disclosure_date(holder, report_data, beg_date, end_date) [total_share, price_unadjust] = FactorPreProcess().make_same_index_columns([total_share, price_unadjust]) total_mv = total_share.mul(price_unadjust) [holder, total_mv] = Stock().make_same_index_columns([holder, total_mv]) bp = holder.div(total_mv) # save data bp = bp.T.dropna(how='all').T self.save_alpha_factor_exposure(bp, self.raw_factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # read data total_share = Stock().read_factor_h5("TotalShare") price_unadjust = Stock().read_factor_h5("Price_Unadjust") income = Stock().read_factor_h5("OperatingIncome") income = Stock().change_single_quarter_to_ttm_quarter(income) report_data = Stock().read_factor_h5("ReportDateDaily") income = Stock().change_quarter_to_daily_with_disclosure_date(income, report_data, beg_date, end_date) # data precessing [total_share, price_unadjust] = FactorPreProcess().make_same_index_columns([total_share, price_unadjust]) total_mv = total_share.mul(price_unadjust) / 100000000 [income, total_mv] = Stock().make_same_index_columns([income, total_mv]) sp = income.div(total_mv) # save data sp = sp.T.dropna(how='all').T self.save_alpha_factor_exposure(sp, self.raw_factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # read data income = Stock().read_factor_h5("OperatingIncome") cost = Stock().read_factor_h5("OperatingCost") total_share = Stock().read_factor_h5("TotalShare") price_unadjust = Stock().read_factor_h5("Price_Unadjust") report_data = Stock().read_factor_h5("ReportDateDaily") profit = income.sub(cost) # data precessing profit = Stock().change_quarter_to_daily_with_disclosure_date(profit, report_data, beg_date, end_date) [total_share, price_unadjust] = FactorPreProcess().make_same_index_columns([total_share, price_unadjust]) total_mv = total_share.mul(price_unadjust) / 100000000 [profit, total_mv] = Stock().make_same_index_columns([profit, total_mv]) gross_ep = 4 * profit.div(total_mv) # save data gross_ep = gross_ep.T.dropna(how='all').T self.save_alpha_factor_exposure(gross_ep, self.raw_factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # read data retain = Stock().read_factor_h5("RetainedEarnings") total_share = Stock().read_factor_h5("TotalShare") price_unadjust = Stock().read_factor_h5("Price_Unadjust") report_data = Stock().read_factor_h5("ReportDateDaily") # data precessing retain = Stock().change_quarter_to_daily_with_disclosure_date( retain, report_data, beg_date, end_date) [total_share, price_unadjust] = FactorPreProcess().make_same_index_columns( [total_share, price_unadjust]) total_mv = total_share.mul(price_unadjust) / 100000000 [retain, total_mv] = Stock().make_same_index_columns([retain, total_mv]) retain2p = 4 * retain.div(total_mv) # save data retain2p = retain2p.T.dropna(how='all').T self.save_alpha_factor_exposure(retain2p, self.raw_factor_name)
def get_data_date(self, date): # alpha data date #################################################################################################### alpha_date_list = list(self.alpha_data.columns) alpha_date_list = list(filter(lambda x: x <= date, alpha_date_list)) alpha_date = pd.DataFrame(self.alpha_data[max(alpha_date_list)]) alpha_date.columns = [self.alpha_factor_name] # alpha_date = FactorPreProcess().standardization(alpha_date) # industry data date #################################################################################################### risk_factor_name = [] type_list = ['INDUSTRY'] barra_industry_date = Barra().get_factor_exposure_date( date=date, type_list=type_list) industry_columns = barra_industry_date.columns risk_factor_name.extend(industry_columns) self.industry_factor_name = industry_columns self.risk_factor_name = risk_factor_name # style data date #################################################################################################### type_list = ['STYLE'] barra_style_date = Barra().get_factor_exposure_date( date=date, type_list=type_list) barra_style_date = FactorPreProcess().standardization(barra_style_date) style_columns = barra_style_date.columns risk_factor_name.extend(style_columns) self.style_factor_name = style_columns self.risk_factor_name = risk_factor_name free_mv_date = pd.DataFrame(self.free_mv_data[date]) free_mv_date.columns = ['FreeMv'] return alpha_date, barra_industry_date, barra_style_date, free_mv_date
def cal_fmp(self, fmp_name, type="Equal"): """ type = 'Equal' 对角线全为1 type = 'FreeMvSqrt' 对角线为自由流通市值的平方根 type = 'BarraStockCov' 对角线为Barra估计的股票协方差矩阵 """ for i_date in range(len(self.change_date_series) - 1): # read alpha data #################################################################################################### date = self.change_date_series[i_date] alpha_date, industry_dummy_date, barra_style_date, free_mv_date = self.get_data_date( date) alpha_date = alpha_date.dropna() alpha_date = FactorPreProcess().remove_extreme_value_mad( alpha_date) alpha_date = FactorPreProcess().standardization(alpha_date) code_list = list(alpha_date.index) code_list.sort() alpha_date = alpha_date.loc[code_list, :] # data #################################################################################################### if type == 'BarraStockCov': stock_cov = Barra().get_stock_covariance(date) code_list = list(set(alpha_date.index) & set(stock_cov.index)) code_list.sort() alpha_date = alpha_date.loc[code_list, :] stock_cov = stock_cov.loc[code_list, code_list] alpha_date = FactorPreProcess().remove_extreme_value_mad( alpha_date) alpha_date = FactorPreProcess().standardization(alpha_date) if len(alpha_date) > self.min_stock_num: if type == 'Equal': P = np.diag(np.ones(shape=(1, len(alpha_date)))[0]) elif type == 'BarraStockCov': P = stock_cov.values Q = np.zeros(shape=(P.shape[0], 1)) A = np.column_stack(alpha_date.values) A_add = np.ones(shape=(1, P.shape[0])) A = np.row_stack((A, A_add)) b = np.array([[1.0], [0.0]]) try: P = matrix(P) Q = matrix(Q) A = matrix(A) b = matrix(b) result = sol.qp(P, q=Q, A=A, b=b) fmp_raw_alpha = pd.DataFrame(np.array(result['x'][0:]), columns=[date], index=code_list).T print( "########## factor mimicking portfolio At %s ##########" % date) concat_data = pd.concat([fmp_raw_alpha.T, alpha_date], axis=1) concat_data = concat_data.dropna() print(concat_data.corr().values[0][0]) except Exception as e: fmp_raw_alpha = pd.DataFrame([], columns=[date], index=code_list).T print( "########## Quadratic Programming FMP is InCorrect %s ##########" % date) # concat #################################################################################################### if i_date == 0: fmp_raw_alpha_all = fmp_raw_alpha else: fmp_raw_alpha_all = pd.concat( [fmp_raw_alpha_all, fmp_raw_alpha], axis=0) # write data #################################################################################################### sub_path = os.path.join(self.path, 'fmp') file = os.path.join( sub_path, '%s_%s_%s.csv' % (self.alpha_factor_name, fmp_name, type)) fmp_raw_alpha_all = fmp_raw_alpha_all.T fmp_raw_alpha_all.to_csv(file)