def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # read data cfo = Stock().read_factor_h5("NetOperateCashFlow") cfo_ttm = Stock().change_single_quarter_to_ttm_quarter(cfo) expense = Stock().read_factor_h5("FinanceExpenseQuarter") expense_ttm = Stock().change_single_quarter_to_ttm_quarter( expense) / 100000000.0 tax_rate = Stock().read_factor_h5("TaxRate") tax_rate = tax_rate.T.fillna(method="pad", limit=5).T expense_ttm_adjust = expense_ttm.mul(1 - tax_rate) cfo_ttm_adjust = cfo_ttm.add(expense_ttm_adjust) holder = Stock().read_factor_h5("TotalShareHoldeRequity") / 100000000.0 debt = Stock().read_factor_h5("InterestDebt") / 100000000.0 cash = Stock().read_factor_h5("CashEquivalents") / 100000000.0 operate_net_asset = holder + debt - cash cfroi = cfo_ttm_adjust.div(operate_net_asset) report_data = Stock().read_factor_h5("ReportDateDaily") cfroi = Stock().change_quarter_to_daily_with_disclosure_date( cfroi, report_data, beg_date, end_date) res = cfroi.T.dropna(how='all').T self.save_alpha_factor_exposure(res, self.raw_factor_name)
def cal_weight_date(self, date): """ 得到某一天的权重""" share_hk = Stock().read_factor_h5("HK2CHoldShare") * 100 # 原始数据为百股单位 price_unadjust = Stock().read_factor_h5("Price_Unadjust") mv_hk = share_hk.mul(price_unadjust) mv_hk = mv_hk.T.dropna(how="all").T try: mv_date = pd.DataFrame(mv_hk[date]) except Exception as e: date = Date().get_trade_date_offset(date, -1) mv_date = pd.DataFrame(mv_hk[date]) mv_date = mv_date.dropna() mv_date = mv_date.sort_values(by=[date], ascending=False) mv_date.columns = ['MarketValue'] mv_date['Weight'] = mv_date['MarketValue'] / mv_date['MarketValue'].sum() mv_date['WeightSum'] = mv_date['Weight'].cumsum() mv_date_filter = mv_date[mv_date['WeightSum'] <= 0.90] mv_date_filter = mv_date.iloc[0:max(len(mv_date_filter), 300), :] mv_date_filter['Weight'] = mv_date_filter['MarketValue'] / mv_date_filter['MarketValue'].sum() mv_date_filter['WeightSum'] = mv_date_filter['Weight'].cumsum() return mv_date_filter
def cal_factor_barra_size(beg_date, end_date): """ 因子说明 计算总市值的对数值 """ # param ################################################################################# raw_factor_name = 'RAW_CNE5_SIZE' factor_name = 'NORMAL_CNE5_SIZE' beg_date = Date().change_to_str(beg_date) end_date = Date().change_to_str(end_date) # read data ################################################################################# price_unadjust = Stock().get_factor_h5("Price_Unadjust", None, "primary_mfc") free_share = Stock().get_factor_h5("TotalShare", None, "primary_mfc") price_unadjust = price_unadjust.ix[:, beg_date:end_date] total_share = free_share.ix[:, beg_date:end_date] # calculate data ################################################################################# [price_unadjust, total_share] = FactorPreProcess().make_same_index_columns( [price_unadjust, total_share]) total_market_value = price_unadjust.mul(free_share) log_size_data = np.log(total_market_value) # save data ################################################################################# Stock().write_factor_h5(log_size_data, raw_factor_name, 'barra_risk_dfc') log_size_data = FactorPreProcess().remove_extreme_value_mad(log_size_data) log_size_data = FactorPreProcess().standardization_free_mv(log_size_data) Stock().write_factor_h5(log_size_data, factor_name, 'barra_risk_dfc') return log_size_data
def PriceHighAdjust(beg_date, end_date): """ 因子说明 :复权最高价格 """ # param ################################################################################# factor_name = "PriceHighAdjust" beg_date = Date().change_to_str(beg_date) end_date = Date().change_to_str(end_date) # read data ################################################################################# price_unadjust = Stock().get_factor_h5("PriceHighUnadjust", None, "primary_mfc") price_facor = Stock().get_factor_h5("AdjustFactor", None, "primary_mfc") price_unadjust = price_unadjust.ix[:, beg_date:end_date] price_facor = price_facor.ix[:, beg_date:end_date] # calculate data ################################################################################# [price_unadjust, price_facor] = FactorPreProcess().make_same_index_columns([price_unadjust, price_facor]) price_adjust = price_unadjust.mul(price_facor) # save data ############################################################################# Stock().write_factor_h5(price_adjust, factor_name, "alpha_dfc") return price_adjust
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # read data cfo = Stock().read_factor_h5("NetOperateCashFlow") cfo_ttm = Stock().change_single_quarter_to_ttm_quarter(cfo) total_share = Stock().read_factor_h5("TotalShare") price_unadjust = Stock().read_factor_h5("Price_Unadjust") report_data = Stock().read_factor_h5("ReportDateDaily") # data precessing cfo_ttm = Stock().change_quarter_to_daily_with_disclosure_date( cfo_ttm, report_data, beg_date, end_date) [total_share, price_unadjust] = FactorPreProcess().make_same_index_columns( [total_share, price_unadjust]) total_mv = total_share.mul(price_unadjust) / 100000000 [cfo_ttm, total_mv] = Stock().make_same_index_columns([cfo_ttm, total_mv]) cfno2p = cfo_ttm.div(total_mv) # save data cfno2p = cfno2p.T.dropna(how='all').T self.save_alpha_factor_exposure(cfno2p, self.raw_factor_name)
def TotalMarketValue(beg_date, end_date): """ 计算股票的总市值 = 总股本 * 未复权股价 """ # param ################################################################################# factor_name = "TotalMarketValue" beg_date = Date().change_to_str(beg_date) end_date = Date().change_to_str(end_date) # read data ################################################################################# price_unadjust = Stock().get_factor_h5("Price_Unadjust", None, "primary_mfc") free_share = Stock().get_factor_h5("TotalShare", None, "primary_mfc") price_unadjust = price_unadjust.ix[:, beg_date:end_date] free_share = free_share.ix[:, beg_date:end_date] # calculate data ################################################################################# [price_unadjust, free_share] = FactorPreProcess().make_same_index_columns([price_unadjust, free_share]) free_market_value = price_unadjust.mul(free_share) # free_market_value /= 100000000.0 # save data ################################################################################ Stock().write_factor_h5(free_market_value, factor_name, "alpha_dfc") return free_market_value
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # param term = 10 effective_term = int(0.8 * term) # read data inflow = Stock().read_factor_h5("Mf_Inflow") price_unadjust = Stock().read_factor_h5("Price_Unadjust") free_share = Stock().read_factor_h5("Free_FloatShare") # calculate data [price_unadjust, free_share] = Stock().make_same_index_columns([price_unadjust, free_share]) free_mv = price_unadjust.mul(free_share) [inflow, free_mv] = Stock().make_same_index_columns([inflow, free_mv]) inflow = inflow.T free_mv = free_mv.T # calculate data daily date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(inflow.index)) date_series.sort() res = pd.DataFrame() for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset(current_date, -(term - 1)) inflow_pre = inflow.loc[data_beg_date:current_date, :] free_mv_pre = free_mv.loc[data_beg_date:current_date, :] if len(inflow_pre) >= effective_term: print('Calculating factor %s at date %s' % (self.raw_factor_name, current_date)) inflow_pre_sum = inflow_pre.sum() free_mv_pre_sum = free_mv_pre.sum() date_data = pd.concat([inflow_pre_sum, free_mv_pre_sum], axis=1) date_data.columns = ['inflow', 'free_mv'] date_data = date_data[date_data['free_mv'] != 0.0] date_data['ratio'] = date_data['inflow'] / date_data['free_mv'] date_data = pd.DataFrame(date_data['ratio']) * 100 date_data.columns = [current_date] else: print('Calculating factor %s at date %s is null' % (self.raw_factor_name, current_date)) date_data = pd.DataFrame([], columns=[current_date], index=free_mv.columns) res = pd.concat([res, date_data], axis=1) res = res.T.dropna(how='all').T self.save_alpha_factor_exposure(res, self.raw_factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # read data advance = Stock().read_factor_h5("AdvanceReceipts") total_share = Stock().read_factor_h5("TotalShare") price_unadjust = Stock().read_factor_h5("Price_Unadjust") report_data = Stock().read_factor_h5("ReportDateDaily") # data precessing advance = Stock().change_quarter_to_daily_with_disclosure_date(advance, report_data, beg_date, end_date) [total_share, price_unadjust] = Stock().make_same_index_columns([total_share, price_unadjust]) total_mv = total_share.mul(price_unadjust) [advance, total_mv] = Stock().make_same_index_columns([advance, total_mv]) ar2p = advance.div(total_mv) res = ar2p.T.dropna(how='all').T self.save_alpha_factor_exposure(res, self.raw_factor_name)
def cal_factor_exposure(self): """ 计算因子暴露 """ # read data price_unadjust = Stock().read_factor_h5("Price_Unadjust") total_share = Stock().read_factor_h5("TotalShare") # calculate data [price_unadjust, total_share] = FactorPreProcess().make_same_index_columns( [price_unadjust, total_share]) total_market_value = price_unadjust.mul(total_share) / 100000000 log_size_data = np.log(total_market_value) # save data self.save_risk_factor_exposure(log_size_data, self.raw_factor_name) log_size_data = FactorPreProcess().remove_extreme_value_mad( log_size_data) log_size_data = FactorPreProcess().standardization(log_size_data) self.save_risk_factor_exposure(log_size_data, self.factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # read data holder = Stock().read_factor_h5("TotalShareHoldeRequity") total_share = Stock().read_factor_h5("TotalShare") price_unadjust = Stock().read_factor_h5("Price_Unadjust") report_data = Stock().read_factor_h5("ReportDateDaily") # data precessing holder = Stock().change_quarter_to_daily_with_disclosure_date(holder, report_data, beg_date, end_date) [total_share, price_unadjust] = FactorPreProcess().make_same_index_columns([total_share, price_unadjust]) total_mv = total_share.mul(price_unadjust) [holder, total_mv] = Stock().make_same_index_columns([holder, total_mv]) bp = holder.div(total_mv) # save data bp = bp.T.dropna(how='all').T self.save_alpha_factor_exposure(bp, self.raw_factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # read data cash = Stock().read_factor_h5("CashEquivalents") total_share = Stock().read_factor_h5("TotalShare") price_unadjust = Stock().read_factor_h5("Price_Unadjust") report_data = Stock().read_factor_h5("ReportDateDaily") # data precessing cash = Stock().change_quarter_to_daily_with_disclosure_date(cash, report_data, beg_date, end_date) [total_share, price_unadjust] = FactorPreProcess().make_same_index_columns([total_share, price_unadjust]) total_mv = total_share.mul(price_unadjust) [cash, total_mv] = Stock().make_same_index_columns([cash, total_mv]) cp = 4 * cash.div(total_mv) # save data cp = cp.T.dropna(how='all').T self.save_alpha_factor_exposure(cp, self.raw_factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # read data total_share = Stock().read_factor_h5("TotalShare") price_unadjust = Stock().read_factor_h5("Price_Unadjust") income = Stock().read_factor_h5("OperatingIncome") income = Stock().change_single_quarter_to_ttm_quarter(income) report_data = Stock().read_factor_h5("ReportDateDaily") income = Stock().change_quarter_to_daily_with_disclosure_date(income, report_data, beg_date, end_date) # data precessing [total_share, price_unadjust] = FactorPreProcess().make_same_index_columns([total_share, price_unadjust]) total_mv = total_share.mul(price_unadjust) / 100000000 [income, total_mv] = Stock().make_same_index_columns([income, total_mv]) sp = income.div(total_mv) # save data sp = sp.T.dropna(how='all').T self.save_alpha_factor_exposure(sp, self.raw_factor_name)
def cal_cash_earnings_to_price_ratio(self, beg_date, end_date): """ 经营性现金流净额 / 总市值 """ nocf = Stock().read_factor_h5("NetOperateCashFlow") report_data = Stock().read_factor_h5("ReportDateDaily") nocf = Stock().change_single_quarter_to_ttm_quarter(nocf) nocf = Stock().change_quarter_to_daily_with_disclosure_date( nocf, report_data, beg_date, end_date) total_share = Stock().read_factor_h5("TotalShare") price_unadjust = Stock().read_factor_h5("Price_Unadjust") result = FactorPreProcess().make_same_index_columns( [nocf, total_share, price_unadjust]) nocf, total_share, price_unadjust = result total_mv = total_share.mul(price_unadjust) / 100000000 nocf_mv = nocf.div(total_mv) nocf_mv = nocf_mv.T.dropna(how='all').T self.save_risk_factor_exposure(nocf_mv, self.raw_factor_name_cash) nocf_mv = FactorPreProcess().remove_extreme_value_mad(nocf_mv) nocf_mv = FactorPreProcess().standardization(nocf_mv) self.save_risk_factor_exposure(nocf_mv, self.factor_name_cash)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # read data income = Stock().read_factor_h5("OperatingIncome") cost = Stock().read_factor_h5("OperatingCost") total_share = Stock().read_factor_h5("TotalShare") price_unadjust = Stock().read_factor_h5("Price_Unadjust") report_data = Stock().read_factor_h5("ReportDateDaily") profit = income.sub(cost) # data precessing profit = Stock().change_quarter_to_daily_with_disclosure_date(profit, report_data, beg_date, end_date) [total_share, price_unadjust] = FactorPreProcess().make_same_index_columns([total_share, price_unadjust]) total_mv = total_share.mul(price_unadjust) / 100000000 [profit, total_mv] = Stock().make_same_index_columns([profit, total_mv]) gross_ep = 4 * profit.div(total_mv) # save data gross_ep = gross_ep.T.dropna(how='all').T self.save_alpha_factor_exposure(gross_ep, self.raw_factor_name)
def cal_factor_exposure(self): """ 计算因子暴露 """ # read data holder = Stock().read_factor_h5("TotalShareHoldeRequityDaily") total_share = Stock().read_factor_h5("TotalShare") price_unadjust = Stock().read_factor_h5("Price_Unadjust") # data precessing [total_share, price_unadjust] = FactorPreProcess().make_same_index_columns( [total_share, price_unadjust]) total_mv = total_share.mul(price_unadjust) [holder, total_mv] = Stock().make_same_index_columns([holder, total_mv]) holder_price = holder.div(total_mv) # save data pb_data = holder_price.T.dropna(how='all').T self.save_risk_factor_exposure(pb_data, self.raw_factor_name) pb_data = FactorPreProcess().remove_extreme_value_mad(pb_data) pb_data = FactorPreProcess().standardization(pb_data) self.save_risk_factor_exposure(pb_data, self.factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # read data retain = Stock().read_factor_h5("RetainedEarnings") total_share = Stock().read_factor_h5("TotalShare") price_unadjust = Stock().read_factor_h5("Price_Unadjust") report_data = Stock().read_factor_h5("ReportDateDaily") # data precessing retain = Stock().change_quarter_to_daily_with_disclosure_date( retain, report_data, beg_date, end_date) [total_share, price_unadjust] = FactorPreProcess().make_same_index_columns( [total_share, price_unadjust]) total_mv = total_share.mul(price_unadjust) / 100000000 [retain, total_mv] = Stock().make_same_index_columns([retain, total_mv]) retain2p = 4 * retain.div(total_mv) # save data retain2p = retain2p.T.dropna(how='all').T self.save_alpha_factor_exposure(retain2p, self.raw_factor_name)
def cal_smb_factor_pct(self): """ 计算大市值股票相对于小市值股票的超额收益(分成三组) """ name = "SMB" share_all = Stock().read_factor_h5("TotalShare") price_close = Stock().read_factor_h5("PriceCloseUnadjust") stock_pct = Stock().read_factor_h5("Pct_chg") total_mv = price_close.mul(share_all) / 100000000 date_series = list(set(stock_pct.columns) & set(total_mv.columns)) date_series.sort() result = pd.DataFrame([], index=date_series, columns=[name]) for i_date in range(1, len(date_series)): date = date_series[i_date] last_date = date_series[i_date - 1] data_date = pd.concat([total_mv[last_date], stock_pct[date]], axis=1) data_date = data_date.dropna() data_date.columns = ['LastMv', 'Return'] data_date = data_date.sort_values(by=['LastMv']) location = int(len(data_date) / 3) small_stock_pct_mean = data_date.loc[data_date.index[0:location], 'Return'].mean() data_date = data_date.sort_values(by=['LastMv'], ascending=False) location = int(len(data_date) / 3) big_stock_pct_mean = data_date.loc[data_date.index[0:location], 'Return'].mean() result.loc[date, name] = small_stock_pct_mean - big_stock_pct_mean result = result.dropna() result['CumSumReturn'] = result[name].cumsum() result.to_csv(os.path.join(self.data_path, 'factor_return', 'FactorReturn_%s.csv' % name))