Пример #1
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股APM因子的stat统计量
        --------
        :param code: string
            个股代码,如600000
        :param calc_date: datetime-like, str
            因子载荷计算日期,格式YYYY-MM-DD
        :return: float
        --------
            stat统计量,计算APM因子载荷的中间变量
        """
        # 1.取得过去40个交易日序列,交易日按降序排列
        calc_date = Utils.to_date(calc_date)
        trading_days = Utils.get_trading_days(end=calc_date,
                                              ndays=40,
                                              ascending=False)

        # 2.取得个股及指数过去__days+1个交易日每个交易日的开盘价、中午收盘价和当天收盘价
        #   开盘价为09:31分钟线的开盘价,中午收盘价为11:30分钟线的收盘价,当天收盘价为15:00分钟线的收盘价
        #   返回的数据格式为DataFrame,columns=['date','open','mid_close','close'],按日期升序排列
        # secu_mkt_data = DataFrame()
        # index_mkt_data = DataFrame()
        # mkt_data_header = ['date', 'open', 'mid_close', 'close']
        # k = 0
        # for trading_day in trading_days:
        #     df_1min_data = Utils.get_min_mkt(Utils.code_to_symbol(code), trading_day, fq=True)
        #     if df_1min_data is not None:
        #         str_date = Utils.datetimelike_to_str(trading_day)
        #         fopen = df_1min_data[df_1min_data.datetime == '%s 09:31:00' % str_date].iloc[0].open
        #         fmid_close = df_1min_data[df_1min_data.datetime == '%s 11:30:00' % str_date].iloc[0].close
        #         fclose = df_1min_data[df_1min_data.datetime == '%s 15:00:00' % str_date].iloc[0].close
        #         secu_mkt_data = secu_mkt_data.append(
        #             Series([str_date, fopen, fmid_close, fclose], index=mkt_data_header), ignore_index=True)
        #
        #         df_1min_data = Utils.get_min_mkt(factor_ct.APM_CT.index_code, trading_day, index=True, fq=True)
        #         fopen = df_1min_data[df_1min_data.datetime == '%s 09:31:00' % str_date].iloc[0].open
        #         fmid_close = df_1min_data[df_1min_data.datetime == '%s 11:30:00' % str_date].iloc[0].close
        #         fclose = df_1min_data[df_1min_data.datetime == '%s 15:00:00' % str_date].iloc[0].close
        #         index_mkt_data = index_mkt_data.append(
        #             Series([str_date, fopen, fmid_close, fclose], index=mkt_data_header), ignore_index=True)
        #         k += 1
        #         if k > cls.__days:
        #             break
        # if k <= cls.__days:
        #     return None
        # secu_mkt_data = secu_mkt_data.sort_values(by='date')
        # secu_mkt_data = secu_mkt_data.reset_index(drop=True)
        # index_mkt_data = index_mkt_data.sort_values(by='date')
        # index_mkt_data = index_mkt_data.reset_index(drop=True)
        # #  3.计算个股及指数的上午收益率数组r_t^{am},R_t^{am}和下午收益率数组r_t^{pm},R_t^{pm},并拼接为一个数组
        # #    拼接后的收益率数组,上半部分为r_t^{am} or R_t^{am},下半部分为r_t^{pm} or R_t^{pm}
        # r_am_array = np.zeros((cls.__days, 1))
        # r_pm_array = np.zeros((cls.__days, 1))
        # for ind in secu_mkt_data.index[1:]:
        #     r_am_array[ind-1, 0] = secu_mkt_data.loc[ind, 'mid_close'] / secu_mkt_data.loc[ind-1, 'close'] - 1.0
        #     r_pm_array[ind-1, 0] = secu_mkt_data.loc[ind, 'close'] / secu_mkt_data.loc[ind, 'mid_close'] - 1.0
        # r_apm_array = np.concatenate((r_am_array, r_pm_array), axis=0)
        #
        # R_am_array = np.zeros((cls.__days, 1))
        # R_pm_array = np.zeros((cls.__days, 1))
        # for ind in index_mkt_data.index[1:]:
        #     R_am_array[ind-1, 0] = index_mkt_data.loc[ind, 'mid_close'] / index_mkt_data.loc[ind-1, 'close'] - 1.0
        #     R_pm_array[ind-1, 0] = index_mkt_data.loc[ind, 'close'] / index_mkt_data.loc[ind, 'mid_close'] - 1.0
        # R_apm_array = np.concatenate((R_am_array, R_pm_array), axis=0)

        # 遍历交易日序列,计算个股及指数的上午收益率(r_am_array,R_am_array)和下午收益率序列(r_pm_array,R_pm_array)
        r_am_array = np.zeros((cls.__days, 1))
        r_pm_array = np.zeros((cls.__days, 1))
        R_am_array = np.zeros((cls.__days, 1))
        R_pm_array = np.zeros((cls.__days, 1))
        k = 0
        for trading_day in trading_days:
            df_1min_data = Utils.get_min_mkt(Utils.code_to_symbol(code),
                                             trading_day,
                                             fq=True)
            if df_1min_data is not None:
                str_date = Utils.datetimelike_to_str(trading_day)
                fopen = df_1min_data[df_1min_data.datetime == '%s 09:31:00' %
                                     str_date].iloc[0].open
                fmid_close = df_1min_data[df_1min_data.datetime ==
                                          '%s 11:30:00' %
                                          str_date].iloc[0].close
                fclose = df_1min_data[df_1min_data.datetime == '%s 15:00:00' %
                                      str_date].iloc[0].close
                r_am_array[k, 0] = fmid_close / fopen - 1.0
                r_pm_array[k, 0] = fclose / fmid_close - 1.0

                df_1min_data = Utils.get_min_mkt(factor_ct.APM_CT.index_code,
                                                 trading_day,
                                                 index=True,
                                                 fq=True)
                fopen = df_1min_data[df_1min_data.datetime == '%s 09:31:00' %
                                     str_date].iloc[0].open
                fmid_close = df_1min_data[df_1min_data.datetime ==
                                          '%s 11:30:00' %
                                          str_date].iloc[0].close
                fclose = df_1min_data[df_1min_data.datetime == '%s 15:00:00' %
                                      str_date].iloc[0].close
                R_am_array[k, 0] = fmid_close / fopen - 1.0
                R_pm_array[k, 0] = fclose / fmid_close - 1.0

                k += 1
                if k == cls.__days:
                    break
        if k < cls.__days:
            return None
        r_apm_array = np.concatenate((r_am_array, r_pm_array), axis=0)
        R_apm_array = np.concatenate((R_am_array, R_pm_array), axis=0)

        # 4.个股收益率数组相对于指数收益率进行线性回归
        #   将指数收益率数组添加常数项
        R_apm_array = sm.add_constant(R_apm_array)
        #   线性回归:r_i = \alpha + \beta * R_i + \epsilon_i
        stat_model = sm.OLS(r_apm_array, R_apm_array)
        stat_result = stat_model.fit()
        resid_array = stat_result.resid.reshape((cls.__days * 2, 1))  # 回归残差数组
        # 5.计算stat统计量
        #   以上得到的__days*2个残差\epsilon_i中,属于上午的记为\epsilon_i^{am},属于下午的记为\epsilong_i^{pm},计算每日上午与
        #   下午残差的差值:$\sigma_t = \spsilon_i^{am} - \epsilon_i^{pm}$,为了衡量上午与下午残差的差异程度,设计统计量:
        #   $stat = \frac{\mu(\sigma_t)}{\delta(\sigma_t)\sqrt(N)}$,其中\mu为均值,\sigma为标准差,N=__days,总的来说
        #   统计量stat反映了剔除市场影响后股价行为上午与下午的差异程度。stat数值大(小)于0越多,则股票在上午的表现越好(差)于下午。
        delta_array = resid_array[:cls.__days] - resid_array[
            cls.__days:]  # 上午与 下午的残差差值
        delta_avg = np.mean(delta_array)  # 残差差值的均值
        delta_std = np.std(delta_array)  # 残差差值的标准差
        # 如果残差差值的标准差接近于0,返回None
        if np.fabs(delta_std) < 0.0001:
            return None
        stat = delta_avg / delta_std / np.sqrt(cls.__days)
        # logging.info('%s, stat = %.6f' % (code, stat))
        return stat
Пример #2
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股日内各时点动量值
        Parameters
        --------
        :param code: str
            个股代码,如600000或SH600000
        :param calc_date: datetime-like, str
            因子载荷计算日期,格式YYYY-MM-DD
        :return: pd.Series
        --------
            日内个时点的动量值,各个index对应的含义如下:
            0. m0: 隔夜时段动量
            1. m1: 第一个小时动量
            2. m2: 第二个小时动量
            3. m3: 第三个小时动量
            4. m4: 第四个小时动量
            5. m_normal: 传统动量
            若计算失败,返回None
        """
        # 取得过去90天的交易日序列,按日期降序排列
        trading_days = Utils.get_trading_days(end=calc_date,
                                              ndays=90,
                                              ascending=False)
        # 取得个股过去90天中的最近21天的1分钟行情数据,根据每天的分钟行情读取日内5个时点的价格,并计算日内收益值
        mkt_data = DataFrame()
        mkt_data_header = ['date', 'p0930', 'p1030', 'p1130', 'p1400', 'p1500']
        intra_day_ret = DataFrame()
        ret_header = ['date', 'r0', 'r1', 'r2', 'r3', 'r4']
        k = 0
        for trading_day in trading_days:
            df_1m_mkt = Utils.get_min_mkt(code, trading_day, fq=True)
            if df_1m_mkt is None:
                continue
            # 计算日内5个时点的价格
            time_label = '%s 09:31:00' % trading_day.strftime('%Y-%m-%d')
            p0930 = df_1m_mkt[df_1m_mkt.datetime == time_label].iloc[0].open
            time_label = '%s 10:30:00' % trading_day.strftime('%Y-%m-%d')
            p1030 = df_1m_mkt[df_1m_mkt.datetime == time_label].iloc[0].close
            time_label = '%s 11:30:00' % trading_day.strftime('%Y-%m-%d')
            p1130 = df_1m_mkt[df_1m_mkt.datetime == time_label].iloc[0].close
            time_label = '%s 14:00:00' % trading_day.strftime('%Y-%m-%d')
            p1400 = df_1m_mkt[df_1m_mkt.datetime == time_label].iloc[0].close
            time_label = '%s 15:00:00' % trading_day.strftime('%Y-%m-%d')
            p1500 = df_1m_mkt[df_1m_mkt.datetime == time_label].iloc[0].close
            s = Series([trading_day, p0930, p1030, p1130, p1400, p1500],
                       index=mkt_data_header)
            mkt_data = mkt_data.append(s, ignore_index=True)
            # 计算日内收益
            if k > 0:
                r0 = math.log(mkt_data.iloc[k - 1].p0930 /
                              mkt_data.iloc[k].p1500)
                r1 = math.log(mkt_data.iloc[k - 1].p1030 /
                              mkt_data.iloc[k - 1].p0930)
                r2 = math.log(mkt_data.iloc[k - 1].p1130 /
                              mkt_data.iloc[k - 1].p1030)
                r3 = math.log(mkt_data.iloc[k - 1].p1400 /
                              mkt_data.iloc[k - 1].p1130)
                r4 = math.log(mkt_data.iloc[k - 1].p1500 /
                              mkt_data.iloc[k - 1].p1400)

                # r0 = mkt_data.iloc[k - 1].p0930 / mkt_data.iloc[k].p1500 -1.0
                # r1 = mkt_data.iloc[k - 1].p1030 / mkt_data.iloc[k - 1].p0930 - 1.0
                # r2 = mkt_data.iloc[k - 1].p1130 / mkt_data.iloc[k - 1].p1030 - 1.0
                # r3 = mkt_data.iloc[k - 1].p1400 / mkt_data.iloc[k - 1].p1130 - 1.0
                # r4 = mkt_data.iloc[k - 1].p1500 / mkt_data.iloc[k - 1].p1400 - 1.0

                s = Series([mkt_data.iloc[k - 1].date, r0, r1, r2, r3, r4],
                           index=ret_header)
                intra_day_ret = intra_day_ret.append(s, ignore_index=True)
            k += 1
            if k > cls.__days:
                break
        if k <= cls.__days:
            return None
        intra_day_ret = intra_day_ret.sort_values(by='date')
        # mkt_data = mkt_data.sort_values(by='date')
        # mkt_data = mkt_data.reset_index(drop=True)
        # 计算传统动量因子值,=过去20日的涨跌幅
        m_normal = math.log(mkt_data.iloc[0].p1500 / mkt_data.iloc[-1].p1500)

        # m_normal = mkt_data.iloc[0].p1500 / mkt_data.iloc[-1].p1500 - 1.0

        # 遍历上述取得的行情数据,计算每日的日内收益值
        # intra_day_ret = DataFrame()
        # ret_header = ['date', 'r0', 'r1', 'r2', 'r3', 'r4']
        # for k in range(1, len(mkt_data)):
        #     r0 = math.log(mkt_data.iloc[k].p0930 / mkt_data.iloc[k-1].p1500)
        #     r1 = math.log(mkt_data.iloc[k].p1030 / mkt_data.iloc[k].p0930)
        #     r2 = math.log(mkt_data.iloc[k].p1130 / mkt_data.iloc[k].p1030)
        #     r3 = math.log(mkt_data.iloc[k].p1400 / mkt_data.iloc[k].p1130)
        #     r4 = math.log(mkt_data.iloc[k].p1500 / mkt_data.iloc[k].p1400)
        #     s = Series([mkt_data.iloc[k].date, r0, r1, r2, r3, r4], index=ret_header)
        #     intra_day_ret = intra_day_ret.append(s, ignore_index=True)
        intra_day_ret = intra_day_ret.set_index('date')
        # 个股的日内各时点的动量因子值等于过去20个交易日各个r_i累加
        intra_day_momentum = intra_day_ret.sum()
        intra_day_momentum.index = ['m0', 'm1', 'm2', 'm3', 'm4']
        intra_day_momentum['m_normal'] = m_normal
        return intra_day_momentum