def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股APM因子的stat统计量 -------- :param code: string 个股代码,如600000 :param calc_date: datetime-like, str 因子载荷计算日期,格式YYYY-MM-DD :return: float -------- stat统计量,计算APM因子载荷的中间变量 """ # 1.取得过去40个交易日序列,交易日按降序排列 calc_date = Utils.to_date(calc_date) trading_days = Utils.get_trading_days(end=calc_date, ndays=40, ascending=False) # 2.取得个股及指数过去__days+1个交易日每个交易日的开盘价、中午收盘价和当天收盘价 # 开盘价为09:31分钟线的开盘价,中午收盘价为11:30分钟线的收盘价,当天收盘价为15:00分钟线的收盘价 # 返回的数据格式为DataFrame,columns=['date','open','mid_close','close'],按日期升序排列 # secu_mkt_data = DataFrame() # index_mkt_data = DataFrame() # mkt_data_header = ['date', 'open', 'mid_close', 'close'] # k = 0 # for trading_day in trading_days: # df_1min_data = Utils.get_min_mkt(Utils.code_to_symbol(code), trading_day, fq=True) # if df_1min_data is not None: # str_date = Utils.datetimelike_to_str(trading_day) # fopen = df_1min_data[df_1min_data.datetime == '%s 09:31:00' % str_date].iloc[0].open # fmid_close = df_1min_data[df_1min_data.datetime == '%s 11:30:00' % str_date].iloc[0].close # fclose = df_1min_data[df_1min_data.datetime == '%s 15:00:00' % str_date].iloc[0].close # secu_mkt_data = secu_mkt_data.append( # Series([str_date, fopen, fmid_close, fclose], index=mkt_data_header), ignore_index=True) # # df_1min_data = Utils.get_min_mkt(factor_ct.APM_CT.index_code, trading_day, index=True, fq=True) # fopen = df_1min_data[df_1min_data.datetime == '%s 09:31:00' % str_date].iloc[0].open # fmid_close = df_1min_data[df_1min_data.datetime == '%s 11:30:00' % str_date].iloc[0].close # fclose = df_1min_data[df_1min_data.datetime == '%s 15:00:00' % str_date].iloc[0].close # index_mkt_data = index_mkt_data.append( # Series([str_date, fopen, fmid_close, fclose], index=mkt_data_header), ignore_index=True) # k += 1 # if k > cls.__days: # break # if k <= cls.__days: # return None # secu_mkt_data = secu_mkt_data.sort_values(by='date') # secu_mkt_data = secu_mkt_data.reset_index(drop=True) # index_mkt_data = index_mkt_data.sort_values(by='date') # index_mkt_data = index_mkt_data.reset_index(drop=True) # # 3.计算个股及指数的上午收益率数组r_t^{am},R_t^{am}和下午收益率数组r_t^{pm},R_t^{pm},并拼接为一个数组 # # 拼接后的收益率数组,上半部分为r_t^{am} or R_t^{am},下半部分为r_t^{pm} or R_t^{pm} # r_am_array = np.zeros((cls.__days, 1)) # r_pm_array = np.zeros((cls.__days, 1)) # for ind in secu_mkt_data.index[1:]: # r_am_array[ind-1, 0] = secu_mkt_data.loc[ind, 'mid_close'] / secu_mkt_data.loc[ind-1, 'close'] - 1.0 # r_pm_array[ind-1, 0] = secu_mkt_data.loc[ind, 'close'] / secu_mkt_data.loc[ind, 'mid_close'] - 1.0 # r_apm_array = np.concatenate((r_am_array, r_pm_array), axis=0) # # R_am_array = np.zeros((cls.__days, 1)) # R_pm_array = np.zeros((cls.__days, 1)) # for ind in index_mkt_data.index[1:]: # R_am_array[ind-1, 0] = index_mkt_data.loc[ind, 'mid_close'] / index_mkt_data.loc[ind-1, 'close'] - 1.0 # R_pm_array[ind-1, 0] = index_mkt_data.loc[ind, 'close'] / index_mkt_data.loc[ind, 'mid_close'] - 1.0 # R_apm_array = np.concatenate((R_am_array, R_pm_array), axis=0) # 遍历交易日序列,计算个股及指数的上午收益率(r_am_array,R_am_array)和下午收益率序列(r_pm_array,R_pm_array) r_am_array = np.zeros((cls.__days, 1)) r_pm_array = np.zeros((cls.__days, 1)) R_am_array = np.zeros((cls.__days, 1)) R_pm_array = np.zeros((cls.__days, 1)) k = 0 for trading_day in trading_days: df_1min_data = Utils.get_min_mkt(Utils.code_to_symbol(code), trading_day, fq=True) if df_1min_data is not None: str_date = Utils.datetimelike_to_str(trading_day) fopen = df_1min_data[df_1min_data.datetime == '%s 09:31:00' % str_date].iloc[0].open fmid_close = df_1min_data[df_1min_data.datetime == '%s 11:30:00' % str_date].iloc[0].close fclose = df_1min_data[df_1min_data.datetime == '%s 15:00:00' % str_date].iloc[0].close r_am_array[k, 0] = fmid_close / fopen - 1.0 r_pm_array[k, 0] = fclose / fmid_close - 1.0 df_1min_data = Utils.get_min_mkt(factor_ct.APM_CT.index_code, trading_day, index=True, fq=True) fopen = df_1min_data[df_1min_data.datetime == '%s 09:31:00' % str_date].iloc[0].open fmid_close = df_1min_data[df_1min_data.datetime == '%s 11:30:00' % str_date].iloc[0].close fclose = df_1min_data[df_1min_data.datetime == '%s 15:00:00' % str_date].iloc[0].close R_am_array[k, 0] = fmid_close / fopen - 1.0 R_pm_array[k, 0] = fclose / fmid_close - 1.0 k += 1 if k == cls.__days: break if k < cls.__days: return None r_apm_array = np.concatenate((r_am_array, r_pm_array), axis=0) R_apm_array = np.concatenate((R_am_array, R_pm_array), axis=0) # 4.个股收益率数组相对于指数收益率进行线性回归 # 将指数收益率数组添加常数项 R_apm_array = sm.add_constant(R_apm_array) # 线性回归:r_i = \alpha + \beta * R_i + \epsilon_i stat_model = sm.OLS(r_apm_array, R_apm_array) stat_result = stat_model.fit() resid_array = stat_result.resid.reshape((cls.__days * 2, 1)) # 回归残差数组 # 5.计算stat统计量 # 以上得到的__days*2个残差\epsilon_i中,属于上午的记为\epsilon_i^{am},属于下午的记为\epsilong_i^{pm},计算每日上午与 # 下午残差的差值:$\sigma_t = \spsilon_i^{am} - \epsilon_i^{pm}$,为了衡量上午与下午残差的差异程度,设计统计量: # $stat = \frac{\mu(\sigma_t)}{\delta(\sigma_t)\sqrt(N)}$,其中\mu为均值,\sigma为标准差,N=__days,总的来说 # 统计量stat反映了剔除市场影响后股价行为上午与下午的差异程度。stat数值大(小)于0越多,则股票在上午的表现越好(差)于下午。 delta_array = resid_array[:cls.__days] - resid_array[ cls.__days:] # 上午与 下午的残差差值 delta_avg = np.mean(delta_array) # 残差差值的均值 delta_std = np.std(delta_array) # 残差差值的标准差 # 如果残差差值的标准差接近于0,返回None if np.fabs(delta_std) < 0.0001: return None stat = delta_avg / delta_std / np.sqrt(cls.__days) # logging.info('%s, stat = %.6f' % (code, stat)) return stat
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股日内各时点动量值 Parameters -------- :param code: str 个股代码,如600000或SH600000 :param calc_date: datetime-like, str 因子载荷计算日期,格式YYYY-MM-DD :return: pd.Series -------- 日内个时点的动量值,各个index对应的含义如下: 0. m0: 隔夜时段动量 1. m1: 第一个小时动量 2. m2: 第二个小时动量 3. m3: 第三个小时动量 4. m4: 第四个小时动量 5. m_normal: 传统动量 若计算失败,返回None """ # 取得过去90天的交易日序列,按日期降序排列 trading_days = Utils.get_trading_days(end=calc_date, ndays=90, ascending=False) # 取得个股过去90天中的最近21天的1分钟行情数据,根据每天的分钟行情读取日内5个时点的价格,并计算日内收益值 mkt_data = DataFrame() mkt_data_header = ['date', 'p0930', 'p1030', 'p1130', 'p1400', 'p1500'] intra_day_ret = DataFrame() ret_header = ['date', 'r0', 'r1', 'r2', 'r3', 'r4'] k = 0 for trading_day in trading_days: df_1m_mkt = Utils.get_min_mkt(code, trading_day, fq=True) if df_1m_mkt is None: continue # 计算日内5个时点的价格 time_label = '%s 09:31:00' % trading_day.strftime('%Y-%m-%d') p0930 = df_1m_mkt[df_1m_mkt.datetime == time_label].iloc[0].open time_label = '%s 10:30:00' % trading_day.strftime('%Y-%m-%d') p1030 = df_1m_mkt[df_1m_mkt.datetime == time_label].iloc[0].close time_label = '%s 11:30:00' % trading_day.strftime('%Y-%m-%d') p1130 = df_1m_mkt[df_1m_mkt.datetime == time_label].iloc[0].close time_label = '%s 14:00:00' % trading_day.strftime('%Y-%m-%d') p1400 = df_1m_mkt[df_1m_mkt.datetime == time_label].iloc[0].close time_label = '%s 15:00:00' % trading_day.strftime('%Y-%m-%d') p1500 = df_1m_mkt[df_1m_mkt.datetime == time_label].iloc[0].close s = Series([trading_day, p0930, p1030, p1130, p1400, p1500], index=mkt_data_header) mkt_data = mkt_data.append(s, ignore_index=True) # 计算日内收益 if k > 0: r0 = math.log(mkt_data.iloc[k - 1].p0930 / mkt_data.iloc[k].p1500) r1 = math.log(mkt_data.iloc[k - 1].p1030 / mkt_data.iloc[k - 1].p0930) r2 = math.log(mkt_data.iloc[k - 1].p1130 / mkt_data.iloc[k - 1].p1030) r3 = math.log(mkt_data.iloc[k - 1].p1400 / mkt_data.iloc[k - 1].p1130) r4 = math.log(mkt_data.iloc[k - 1].p1500 / mkt_data.iloc[k - 1].p1400) # r0 = mkt_data.iloc[k - 1].p0930 / mkt_data.iloc[k].p1500 -1.0 # r1 = mkt_data.iloc[k - 1].p1030 / mkt_data.iloc[k - 1].p0930 - 1.0 # r2 = mkt_data.iloc[k - 1].p1130 / mkt_data.iloc[k - 1].p1030 - 1.0 # r3 = mkt_data.iloc[k - 1].p1400 / mkt_data.iloc[k - 1].p1130 - 1.0 # r4 = mkt_data.iloc[k - 1].p1500 / mkt_data.iloc[k - 1].p1400 - 1.0 s = Series([mkt_data.iloc[k - 1].date, r0, r1, r2, r3, r4], index=ret_header) intra_day_ret = intra_day_ret.append(s, ignore_index=True) k += 1 if k > cls.__days: break if k <= cls.__days: return None intra_day_ret = intra_day_ret.sort_values(by='date') # mkt_data = mkt_data.sort_values(by='date') # mkt_data = mkt_data.reset_index(drop=True) # 计算传统动量因子值,=过去20日的涨跌幅 m_normal = math.log(mkt_data.iloc[0].p1500 / mkt_data.iloc[-1].p1500) # m_normal = mkt_data.iloc[0].p1500 / mkt_data.iloc[-1].p1500 - 1.0 # 遍历上述取得的行情数据,计算每日的日内收益值 # intra_day_ret = DataFrame() # ret_header = ['date', 'r0', 'r1', 'r2', 'r3', 'r4'] # for k in range(1, len(mkt_data)): # r0 = math.log(mkt_data.iloc[k].p0930 / mkt_data.iloc[k-1].p1500) # r1 = math.log(mkt_data.iloc[k].p1030 / mkt_data.iloc[k].p0930) # r2 = math.log(mkt_data.iloc[k].p1130 / mkt_data.iloc[k].p1030) # r3 = math.log(mkt_data.iloc[k].p1400 / mkt_data.iloc[k].p1130) # r4 = math.log(mkt_data.iloc[k].p1500 / mkt_data.iloc[k].p1400) # s = Series([mkt_data.iloc[k].date, r0, r1, r2, r3, r4], index=ret_header) # intra_day_ret = intra_day_ret.append(s, ignore_index=True) intra_day_ret = intra_day_ret.set_index('date') # 个股的日内各时点的动量因子值等于过去20个交易日各个r_i累加 intra_day_momentum = intra_day_ret.sum() intra_day_momentum.index = ['m0', 'm1', 'm2', 'm3', 'm4'] intra_day_momentum['m_normal'] = m_normal return intra_day_momentum