Пример #1
0
    def get_wind_file(self):
        """ 得到wind权重 """

        fund_index = FundRegressionExposure(
            self.port_name).get_fund_regression_exposure(self.fund_index_code)
        fund_index = fund_index.dropna(how='all')
        fund_index = fund_index.T
        date_series = Date().get_trade_date_series(fund_index.columns[0],
                                                   fund_index.columns[-1], "W")
        date_series = list(set(date_series) & set(fund_index.columns))
        date_series.sort()
        sub_path = os.path.join(self.wind_port_path, self.port_name)

        if not os.path.exists(sub_path):
            os.makedirs(sub_path)

        for i_date in range(len(date_series)):

            date = date_series[i_date]
            print("Generate File %s" % date)
            data_date = pd.DataFrame(fund_index[date])
            next_date = Date().get_trade_date_offset(date, 1)
            data_date.columns = ['Weight']
            data_date.index.name = 'Code'
            data_date["CreditTrading"] = "No"
            data_date["Date"] = next_date
            data_date["Price"] = 0.0
            data_date["Direction"] = "Long"
            file = os.path.join(sub_path,
                                '%s_%s.csv' % (self.port_name, next_date))
            data_date.to_csv(file)
    def cal_factor_barra_cumulative_range(self, beg_date, end_date):
        """ 过去1-12月最大累计收益 和最小累计收益的差 """

        # param
        t = 12
        month_days = 21

        pct_chg = Stock().read_factor_h5("Pct_chg").applymap(
            lambda x: np.log(x / 100 + 1)).T

        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(date_series) & set(pct_chg.columns))
        date_series.sort()

        pct_chg_panel = pd.Panel()

        for i in range(t):

            length = month_days * (i + 1)
            pct_chg_sum = pct_chg.rolling(length).sum()
            pct_chg_sum = pct_chg_sum.dropna(how='all')
            pct_chg_panel = pd.concat([pct_chg_panel, pct_chg_sum], axis=0)

        pct_max = pct_chg_panel.max(axis=0)
        pct_max = pct_max.applymap(lambda x: np.log(x + 1)).T
        pct_min = pct_chg_panel.min(axis=0)
        pct_min = pct_min.applymap(lambda x: np.log(x + 1)).T
        res = pct_max.sub(pct_min)

        self.save_risk_factor_exposure(res, self.raw_factor_name_range)
        res = Stock().remove_extreme_value_mad(res)
        res = Stock().standardization(res)
        self.save_risk_factor_exposure(res, self.factor_name_range)
Пример #3
0
    def generate_patch_file(self, factor_name, beg_date, end_date):

        """ 将因子生成邮件 patch 格式 """

        data = self.get_risk_factor_exposure(factor_name)
        data = data.loc[:, beg_date:end_date]
        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(date_series) & set(data.columns))
        date_series.sort()

        path = os.path.join(self.exposure_txt_path, factor_name)
        if os.path.exists(path):
            shutil.rmtree(path)

        for date in date_series:
            data_date = pd.DataFrame(data[date])
            data_date.columns = [factor_name]
            data_date = data_date.dropna()
            data_date = data_date.round(6)

            if len(data_date) > 0:
                print("Patch Txt File %s %s" % (factor_name, date))

                if not os.path.exists(path):
                    os.makedirs(path)
                file = os.path.join(path, "%s_%s.txt" % (factor_name, date))
                PandasToTxt().to_txt(data_date, file)
Пример #4
0
    def cal_factor_exposure(self, beg_date, end_date):
        """ 计算因子暴露 """

        long_term = 35
        short_term = 5

        # read data
        ltg = Stock().read_factor_h5("ExpectedNetProfitYoY").T
        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(date_series) & set(ltg.index))
        date_series.sort()
        result = pd.DataFrame()

        for i in range(0, len(date_series)):

            current_date = date_series[i]
            long_beg_date = Date().get_trade_date_offset(
                current_date, -(long_term - 1))
            short_beg_date = Date().get_trade_date_offset(
                current_date, -(short_term - 1))

            long_mean = ltg.loc[long_beg_date:current_date, :].mean()
            short_mean = ltg.loc[short_beg_date:current_date, :].mean()
            bias = short_mean - long_mean
            std = ltg.loc[long_beg_date:short_beg_date, :].std()
            res_add = pd.DataFrame(bias / (1 + std))
            res_add.columns = [current_date]
            result = pd.concat([result, res_add], axis=1)

        # save data
        ltg = ltg.T.dropna(how='all').T
        self.save_alpha_factor_exposure(ltg, self.raw_factor_name)
Пример #5
0
def TurnOverBias6m3m(beg_date, end_date):

    """
    因子说明:160天平均换手率 - 60天平均换手率
    函数名有错误 以后可以更改
    """

    # param
    #################################################################################
    LongTerm = 120
    ShortTerm = 60
    factor_name = "TurnOverBias6m3m"
    ipo_num = 90

    # read data
    #################################################################################
    turn_over = Stock().get_factor_h5("TurnOver_Daily", None, "primary_mfc").T

    # code set & date set
    #################################################################################
    pass

    # calculate data daily
    #################################################################################
    date_series = Date().get_trade_date_series(beg_date, end_date)
    date_series = list(set(turn_over.index) & set(date_series))
    date_series.sort()

    for i in range(0, len(date_series)):

        current_date = date_series[i]

        data_beg_date_long = Date().get_trade_date_offset(current_date, -(LongTerm-1))
        data_beg_date_short = Date().get_trade_date_offset(current_date, -(ShortTerm-1))

        turn_over_long = turn_over.ix[data_beg_date_long:current_date, :]
        turn_over_long = turn_over_long.T.dropna(how='all').T
        turn_over_short = turn_over.ix[data_beg_date_short:current_date, :]
        turn_over_short = turn_over_short.T.dropna(how='all').T

        if len(turn_over_long) >= int(0.8*LongTerm):

            print('Calculating factor %s at date %s' % (factor_name, current_date))
            turn_over_diff = turn_over_long.mean() - turn_over_short.mean()

        else:
            print('Calculating factor %s at date %s is null' % (factor_name, current_date))
            turn_over_diff = pd.DataFrame([], columns=[current_date], index=turn_over.columns)

        if i == 0:
            res = pd.DataFrame(turn_over_diff.values, columns=[current_date], index=turn_over_diff.index)
        else:
            res_add = pd.DataFrame(turn_over_diff.values, columns=[current_date], index=turn_over_diff.index)
            res = pd.concat([res, res_add], axis=1)

    res = res.T.dropna(how='all').T
    # save data
    #############################################################################
    Stock().write_factor_h5(res, factor_name, "alpha_dfc")
    return res
Пример #6
0
    def cal_factor_exposure(self, beg_date=None, end_date=None):
        """ 计算因子暴露 """

        # read data
        size_data = self.get_risk_factor_exposure("cne5_normal_size")
        square_size_data = size_data**3

        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(date_series) & set(size_data.columns))
        date_series.sort()
        res_data = pd.DataFrame([])

        # calculate everyday
        for i_date in range(len(date_series)):

            date = date_series[i_date]
            print('Calculating Barra Risk factor %s at date %s' %
                  (self.factor_name, date))
            regression_data = pd.concat(
                [size_data[date], square_size_data[date]], axis=1)
            regression_data.columns = ['x', 'y']
            regression_data = regression_data.dropna()
            y = regression_data['y'].values
            x = regression_data['x'].values
            x_add = sm.add_constant(x)
            model = sm.OLS(y, x_add).fit()
            regression_data['res'] = regression_data['y'] - model.fittedvalues
            res_data_date = pd.DataFrame(regression_data['res'])
            res_data_date.columns = [date]
            res_data = pd.concat([res_data, res_data_date], axis=1)

        res_data = res_data.T.dropna(how='all').T
        res_data = FactorPreProcess().remove_extreme_value_mad(res_data)
        res_data = FactorPreProcess().standardization(res_data)
        self.save_risk_factor_exposure(res_data, self.factor_name)
Пример #7
0
    def change_quarter_to_daily_with_disclosure_date(data,
                                                     report_data,
                                                     beg_date=None,
                                                     end_date=None):
        """ 将季度数据转化为日度数据 按照披露季报时间 """

        data = data.dropna(how='all')
        report_data = report_data.dropna(how='all')

        if beg_date is None:
            beg_date = data.columns[0]
        if end_date is None:
            end_date = datetime.today()

        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(report_data.columns) & set(date_series))
        date_series.sort()

        for i_date in range(len(date_series)):

            date_daily = date_series[i_date]
            report_data_val = report_data[date_daily]
            report_data_val = report_data_val.dropna()
            report_date_list = list(set(list(report_data_val.values)))
            print("Calculate Daily Data at %s with %s " %
                  (date_daily, report_date_list))

            for i_set in range(len(report_date_list)):

                report_date_number = report_date_list[i_set]
                report_date = str(int(report_date_number))
                stock_index = list(
                    (report_data_val[report_data_val == report_date_number]
                     ).index.values)
                stock_index = list(set(stock_index) & set(data.index))
                stock_index.sort()

                try:
                    data_ttm = data.ix[stock_index, report_date]
                    data_ttm = pd.DataFrame(data_ttm.values,
                                            columns=[date_daily],
                                            index=data_ttm.index)
                except Exception as e:
                    data_ttm = pd.DataFrame([], columns=[date_daily])

                if i_set == 0:
                    res = data_ttm
                else:
                    res_add = data_ttm
                    res = pd.concat([res, res_add], axis=0)
                    res = res.loc[~res.index.duplicated(keep='first'), :]
                    index_sort = list(set(res.index))
                    index_sort.sort()
                    res = res.loc[index_sort, :]

            if i_date == 0:
                result = res
            else:
                result = pd.concat([result, res], axis=1)
        return result
Пример #8
0
def lasso_fund_pool():

    fund_holder = Fund().get_fund_holding_all()
    position_all = Fund().get_fund_factor("Stock_Ratio", date_list=["20180331"], fund_pool=None)
    code_list = list(code_list['wind_code'].values)
    date_list = Date().get_normal_date_series(beg_date="20041231", end_date=datetime.today(), period="Q")

    code_list.sort()
    date_list.sort()

    result = pd.DataFrame([], index=code_list, columns=date_list)

    for i_date in range(len(date_list)):

        for i_fund in range(len(code_list)):

            fund_code = code_list[i_fund]
            date = date_list[i_date]

            holder = fund_holder[fund_holder.FundCode == fund_code]
            holder = holder[holder.Date == date]
            holder = holder.sort_values(by=['Weight'], ascending=False)
            holder = holder.reset_index(drop=True)

            if len(holder) >= 10:
                holder = holder.ix[0:10, :]
                result.ix[fund_code, date] = holder.Weight.sum()
                print("计算 %s 在 %s 的前10大重仓股票为 %s" % (fund_code, date, holder.Weight.sum()))

    result.to_csv(path + '')
Пример #9
0
    def cal_factor_exposure(self, beg_date, end_date):
        """ 计算因子暴露 """

        # read data
        pe_ttm = Stock().read_factor_h5("PE_ttm")

        # calculate data daily
        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(pe_ttm.columns) & set(date_series))
        date_series.sort()
        res = pd.DataFrame()

        for i in range(0, len(date_series)):

            current_date = date_series[i]
            print('Calculating factor %s at date %s' %
                  (self.raw_factor_name, current_date))
            data_cur = pe_ttm[current_date]
            data_cur = data_cur[data_cur != 0.0]
            ep_ttm = 1.0 / data_cur
            ep_ttm = pd.DataFrame(ep_ttm.values,
                                  columns=[current_date],
                                  index=ep_ttm.index)
            res = pd.concat([res, ep_ttm], axis=1)

        res = res.T.dropna(how='all').T
        self.save_alpha_factor_exposure(res, self.raw_factor_name)
Пример #10
0
    def cal_factor_exposure(self, beg_date, end_date):
        """ 计算因子暴露 """

        # param
        long_term = 20
        effective_term = int(long_term / 2)
        extreme_value = 80

        # read data
        pct = Stock().read_factor_h5("Pct_chg").T
        trade_amount = Stock().read_factor_h5("TradeAmount").T / 100000000

        # data precessing
        [pct,
         trade_amount] = Stock().make_same_index_columns([pct, trade_amount])
        trade_amount = trade_amount.fillna(0.0)

        # calculate data daily
        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(date_series) & set(pct.index))
        date_series.sort()
        res = pd.DataFrame()

        for i in range(0, len(date_series)):

            current_date = date_series[i]
            data_beg_date = Date().get_trade_date_offset(
                current_date, -(long_term - 1))
            trade_amount_before = trade_amount.loc[
                data_beg_date:current_date, :]

            if len(trade_amount_before) > effective_term:
                print('Calculating factor %s at date %s' %
                      (self.raw_factor_name, current_date))
                zero_number = trade_amount_before.applymap(
                    lambda x: 1.0 if x == 0.0 else 0.0).sum()
                code_filter_list = (
                    zero_number[zero_number < effective_term]).index
                amount_before = trade_amount.loc[data_beg_date:current_date,
                                                 code_filter_list]
                pct_before = pct.loc[data_beg_date:current_date,
                                     code_filter_list]
                iq = pct_before.abs().div(amount_before)
                iq[iq > extreme_value] = np.nan
                bias = iq.mean()
                bias = pd.DataFrame(bias)
                bias.columns = [current_date]
            else:
                print('Calculating factor %s at date %s is null' %
                      (self.raw_factor_name, current_date))
                bias = pd.DataFrame([],
                                    columns=[current_date],
                                    index=trade_amount_before.columns)

            res = pd.concat([res, bias], axis=1)

        res = res.T.dropna(how='all').T
        self.save_alpha_factor_exposure(res, self.raw_factor_name)
Пример #11
0
def VolumeMean20d(beg_date, end_date):
    """
    因子说明:过去20个交易日的平均交易额
    """

    # param
    #################################################################################
    LongTerm = 20
    factor_name = "VolumeMean20d"
    ipo_num = 90

    # read data
    #################################################################################
    trade_amount = Stock().get_factor_h5("TradeAmount", None, "primary_mfc")

    # data precessing
    #################################################################################
    pass

    # calculate data daily
    #################################################################################
    date_series = Date().get_trade_date_series(beg_date, end_date)
    date_series = list(set(trade_amount.columns) & set(date_series))
    date_series.sort()

    for i in range(0, len(date_series)):

        current_date = date_series[i]
        data_beg_date = Date().get_trade_date_offset(current_date,
                                                     -(LongTerm - 1))
        trade_amount_before = trade_amount.ix[:, data_beg_date:current_date]

        if current_date in trade_amount.columns:
            print('Calculating factor %s at date %s' %
                  (factor_name, current_date))
            avg_trade_amount = trade_amount_before.mean(axis=1)
            avg_trade_amount = pd.DataFrame(avg_trade_amount.values,
                                            columns=[current_date],
                                            index=avg_trade_amount.index)

        else:
            print('Calculating factor %s at date %s is null' %
                  (factor_name, current_date))
            avg_trade_amount = pd.DataFrame([],
                                            columns=[current_date],
                                            index=trade_amount.index)

        if i == 0:
            res = avg_trade_amount
        else:
            res = pd.concat([res, avg_trade_amount], axis=1)

    res = res.T.dropna(how='all').T

    # save data
    #############################################################################
    Stock().write_factor_h5(res, factor_name, "alpha_dfc")
    return res
Пример #12
0
def THSBias(beg_date, end_date):

    """
    因子说明: 最近10天平均 减去 之前30天平均 同花顺点击数量 的负值
    """

    # param
    #################################################################################
    LongTerm = 40
    HalfTerm = int(LongTerm/2)
    factor_name = "THSBias"
    ipo_num = 90

    # read data
    #################################################################################
    click_num = Stock().get_factor_h5("click_num", None, "primary_mfc")

    # data precessing
    #################################################################################
    pass

    # calculate data daily
    #################################################################################
    date_series = Date().get_trade_date_series(beg_date, end_date)
    date_series = list(set(date_series) & set(click_num.columns))
    date_series.sort()

    for i in range(0, len(date_series)):

        current_date = date_series[i]
        data_beg_date = Date().get_trade_date_offset(current_date, -(LongTerm-1))
        data_period = click_num.ix[:, data_beg_date:current_date]
        data_period = data_period.T.dropna(how='all')

        if len(data_period) > HalfTerm:
            print('Calculating factor %s at date %s' % (factor_name, current_date))
            data_date_pre30 = data_period.ix[0:30, :].mean()
            data_date_next10 = data_period.ix[-10:, :].mean()
            data_date = -(data_date_next10 - data_date_pre30)
            effective_number = data_period.count()
            data_date[effective_number <= HalfTerm] = np.nan
            data_date = pd.DataFrame(data_date.values, columns=[current_date], index=data_date.index)
        else:
            print('Calculating factor %s at date %s is null' % (factor_name, current_date))
            data_date = pd.DataFrame([], columns=[current_date], index=click_num.index)

        if i == 0:
            res = data_date
        else:
            res = pd.concat([res, data_date], axis=1)

    res = res.T.dropna(how='all').T

    # save data
    #############################################################################
    Stock().write_factor_h5(res, factor_name, "alpha_dfc")
    return res
Пример #13
0
def Skewness(beg_date, end_date):

    """
    因子说明: -1 * 偏度
    """

    # param
    #################################################################################
    LongTerm = 150
    MinimumSize = 120
    factor_name = "Skewness"
    ipo_num = 90

    # read data
    #################################################################################
    pct = Stock().get_factor_h5("Pct_chg", None, "primary_mfc")

    # data precessing
    #################################################################################
    pass

    # calculate data daily
    #################################################################################
    date_series = Date().get_trade_date_series(beg_date, end_date)
    date_series = list(set(pct.columns) & set(date_series))
    date_series.sort()

    for i in range(0, len(date_series)):

        current_date = date_series[i]
        data_beg_date = Date().get_trade_date_offset(current_date, -(LongTerm-1))
        pct_before = pct.ix[:, data_beg_date:current_date]
        pct_stock = pct_before.T.dropna(how='all')

        if len(pct_stock) > MinimumSize:
            print('Calculating factor %s at date %s' % (factor_name, current_date))
            skew_date = -pct_stock.skew()
            effective_number = pct_stock.count()
            skew_date[effective_number <= MinimumSize] = np.nan
            skew_date = pd.DataFrame(skew_date.values, columns=[current_date], index=skew_date.index)
        else:
            print('Calculating factor %s at date %s is null' % (factor_name, current_date))
            skew_date = pd.DataFrame([], columns=[current_date], index=pct.index)

        if i == 0:
            res = skew_date
        else:
            res = pd.concat([res, skew_date], axis=1)

    res = res.T.dropna(how='all').T

    # save data
    #############################################################################
    Stock().write_factor_h5(res, factor_name, "alpha_dfc")
    return res
    def cal_factor_exposure(self, beg_date, end_date):
        """ 残差波动率因子加和(考虑有其中几个数据缺失该怎么办) """

        self.cal_factor_barra_std(beg_date, end_date)
        self.cal_factor_barra_cumulative_range(beg_date, end_date)
        self.cal_factor_barra_hsigma(beg_date, end_date)

        dastd = 0.74 * self.get_risk_factor_exposure("cne5_normal_res_vol_std")
        cr = 0.16 * self.get_risk_factor_exposure(
            "cne5_normal_res_vol_cumulative_range")
        hsigma = 0.10 * self.get_risk_factor_exposure(
            "cne5_normal_res_vol_hsigma")

        size_data = self.get_risk_factor_exposure("cne5_normal_size")
        beta_data = self.get_risk_factor_exposure("cne5_normal_beta")

        residual_volatility = dastd.add(cr, fill_value=0.0)
        residual_volatility = residual_volatility.add(hsigma, fill_value=0.0)
        residual_volatility = residual_volatility.T.dropna(how='all').T

        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(
            set(size_data.columns) & set(residual_volatility.columns)
            & set(beta_data.columns) & set(date_series))
        date_series.sort()

        residual_volatility_res = pd.DataFrame([])

        for i_date in range(len(date_series)):

            date = date_series[i_date]
            print('Calculating Barra Risk factor %s at date %s' %
                  (self.factor_name, date))
            regression_data = pd.concat(
                [size_data[date], beta_data[date], residual_volatility[date]],
                axis=1)
            regression_data.columns = ['size', 'beta', 'residual_volatility']
            regression_data = regression_data.dropna()

            if len(regression_data) > 0:
                y = regression_data['residual_volatility'].values
                x = regression_data[['size', 'beta']].values
                x_add = sm.add_constant(x)
                model = sm.OLS(y, x_add).fit()
                regression_data['res'] = regression_data[
                    'residual_volatility'] - model.fittedvalues
                res_date = pd.DataFrame(regression_data['res'])
                res_date.columns = [date]
                residual_volatility_res = pd.concat(
                    [residual_volatility_res, res_date], axis=1)

        # save data
        res = Stock().remove_extreme_value_mad(residual_volatility_res)
        res = Stock().standardization(res)
        self.save_risk_factor_exposure(res, self.factor_name)
Пример #15
0
    def wind_file(self):
        """ 一般因子不做行业和风格回归 但是限制每个行业不能太多 """

        date_series = Date().get_trade_date_series(self.alpha_data.columns[0],
                                                   self.alpha_data.columns[-1],
                                                   "M")
        date_series = list(
            set(date_series) & set(self.free_mv.columns)
            & set(self.alpha_data.columns))
        date_series.sort()

        for i_date in range(len(date_series)):

            date = date_series[i_date]
            print(date)
            alpha_date = pd.DataFrame(self.alpha_data[date])
            alpha_date.columns = ['Alpha']

            mv_date = pd.DataFrame(self.free_mv[date])
            mv_date.columns = ['FreeMV']
            mv_date['FreeMV'] = mv_date['FreeMV'].map(np.sqrt)

            data = pd.concat([alpha_date, mv_date], axis=1)
            data = data.dropna()

            # 去掉流通市值小的股票
            data = data.sort_values(by=['FreeMV'], ascending=False)
            data = data.iloc[0:int(len(data) * 0.60), :]

            data = data.sort_values(by=['Alpha'], ascending=False)

            sub_path = os.path.join(self.wind_port_path, self.port_name)

            if not os.path.exists(sub_path):
                os.makedirs(sub_path)

            if len(data) > 150:

                l = int(len(data) / 10)
                data = data.iloc[0:l, :]
                date = date_series[i_date]
                print("Generate File %s" % date, len(data))

                next_date = Date().get_trade_date_offset(date, 1)
                data['Weight'] = data['FreeMV'] / data['FreeMV'].sum()
                data.index.name = 'Code'
                data["CreditTrading"] = "No"
                data["Date"] = next_date
                data["Price"] = 0.0
                data["Direction"] = "Long"
                file = os.path.join(sub_path,
                                    '%s_%s.csv' % (self.port_name, next_date))
                data.to_csv(file)
Пример #16
0
def ReturnBetweendayLn(beg_date, end_date):
    """
    因子说明:日间收益率 的对数 今天开盘 / 昨日收盘价
    权重为线性加权
    """

    # param
    #################################################################################
    factor_name = 'ReturnBetweendayLn'
    ipo_num = 90

    # read data
    #################################################################################
    close = Stock().get_factor_h5("PriceCloseAdjust", None, "alpha_dfc")
    open = Stock().get_factor_h5("PriceOpenAdjust", None, "alpha_dfc")

    # data precessing
    #################################################################################
    [close, open] = Stock().make_same_index_columns([close, open])

    # calculate data daily
    #################################################################################
    date_series = Date().get_trade_date_series(beg_date, end_date)
    date_series = list(set(date_series) & set(close.columns))
    date_series.sort()

    res = pd.DataFrame([], columns=date_series, index=close.index)

    for i in range(1, len(date_series)):

        current_date = date_series[i]
        before_date = Date().get_trade_date_offset(current_date, -1)

        if current_date in close.columns:

            print('Calculating factor %s at date %s' %
                  (factor_name, current_date))

            close_yes = close[before_date]
            open_today = open[current_date]
            data_date = (open_today / close_yes).map(np.log) * 100
            res[current_date] = data_date
        else:
            print('Calculating factor %s at date %s is null' %
                  (factor_name, current_date))

    res = res.T.dropna(how='all').T

    # save data
    #############################################################################
    Stock().write_factor_h5(res, factor_name, "alpha_dfc")
    return res
    def cal_factor_exposure(self, beg_date, end_date):

        """ 计算因子暴露 """

        # param
        term = 10
        effective_term = int(0.8 * term)

        # read data
        inflow = Stock().read_factor_h5("Mf_Inflow")
        price_unadjust = Stock().read_factor_h5("Price_Unadjust")
        free_share = Stock().read_factor_h5("Free_FloatShare")

        # calculate data
        [price_unadjust, free_share] = Stock().make_same_index_columns([price_unadjust, free_share])
        free_mv = price_unadjust.mul(free_share)
        [inflow, free_mv] = Stock().make_same_index_columns([inflow, free_mv])
        inflow = inflow.T
        free_mv = free_mv.T

        # calculate data daily
        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(date_series) & set(inflow.index))
        date_series.sort()
        res = pd.DataFrame()

        for i in range(0, len(date_series)):

            current_date = date_series[i]
            data_beg_date = Date().get_trade_date_offset(current_date, -(term - 1))
            inflow_pre = inflow.loc[data_beg_date:current_date, :]
            free_mv_pre = free_mv.loc[data_beg_date:current_date, :]

            if len(inflow_pre) >= effective_term:

                print('Calculating factor %s at date %s' % (self.raw_factor_name, current_date))
                inflow_pre_sum = inflow_pre.sum()
                free_mv_pre_sum = free_mv_pre.sum()
                date_data = pd.concat([inflow_pre_sum, free_mv_pre_sum], axis=1)
                date_data.columns = ['inflow', 'free_mv']
                date_data = date_data[date_data['free_mv'] != 0.0]
                date_data['ratio'] = date_data['inflow'] / date_data['free_mv']
                date_data = pd.DataFrame(date_data['ratio']) * 100
                date_data.columns = [current_date]
            else:
                print('Calculating factor %s at date %s is null' % (self.raw_factor_name, current_date))
                date_data = pd.DataFrame([], columns=[current_date], index=free_mv.columns)

            res = pd.concat([res, date_data], axis=1)

        res = res.T.dropna(how='all').T
        self.save_alpha_factor_exposure(res, self.raw_factor_name)
Пример #18
0
    def cal_factor_exposure(self, beg_date, end_date):
        """ 计算因子暴露 """

        # params
        long_term = 40
        short_term = int(long_term * 0.5)
        min_term = int(long_term * 0.8)

        # read data
        trade_amount = Stock().read_factor_h5("TradeAmount").T
        trade_amount = trade_amount.dropna(how='all')

        # calculate data daily
        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(trade_amount.index) & set(date_series))
        date_series.sort()
        res = pd.DataFrame([])

        for i in range(0, len(date_series)):

            current_date = date_series[i]
            data_beg_date = Date().get_trade_date_offset(
                current_date, -(long_term - 1))
            amount_before = trade_amount.loc[data_beg_date:current_date, :]
            amount_before = amount_before.fillna(0.0)

            if len(amount_before) >= min_term:

                print('Calculating factor %s at date %s' %
                      (self.raw_factor_name, current_date))
                zero_number = amount_before.applymap(
                    lambda x: 1.0 if x == 0.0 else 0.0).sum()
                code_filter_list = (
                    zero_number[zero_number < short_term]).index

                amount_pre = trade_amount.loc[data_beg_date:current_date,
                                              code_filter_list]
                amount_pre_cv = -amount_pre.std() / amount_pre.mean()
                amount_pre_cv = pd.DataFrame(amount_pre_cv)
                amount_pre_cv.columns = [current_date]

            else:
                print('Calculating factor %s at date %s is null' %
                      (self.raw_factor_name, current_date))
                amount_pre_cv = pd.DataFrame([],
                                             columns=[current_date],
                                             index=trade_amount.columns)

            res = pd.concat([res, amount_pre_cv], axis=1)

        res = res.T.dropna(how='all').T
        self.save_alpha_factor_exposure(res, self.raw_factor_name)
Пример #19
0
    def cal_return_all(self, beg_date, end_date, risk_model_name, period='D'):
        """ 计算每天的因子收益率、因子暴露、股票的残差收益率 """

        self.set_model_name(risk_model_name)
        self.get_data_all(risk_model_name)

        date_series = Date().get_trade_date_series(beg_date, end_date, period)
        date_series = list(
            set(date_series) & set(self.pct_chg.columns)
            & set(self.free_mv_data.columns) & set(self.trade_status.columns)
            & set(self.industry.columns))
        date_series.sort()

        factor_return = pd.DataFrame()
        res_return = pd.DataFrame()

        # 因子暴露文件
        for i_date in range(len(date_series)):

            date = date_series[i_date]
            before_date = Date().get_trade_date_offset(date, -1)
            factor_return_date, exposure_before_date, res_return_date = self.cal_return_date(
                date)
            exposure_file = os.path.join(self.exposure_path,
                                         "exposure_%s.csv" % before_date)
            exposure_before_date.to_csv(exposure_file)
            factor_return = pd.concat([factor_return, factor_return_date],
                                      axis=1)
            res_return = pd.concat([res_return, res_return_date], axis=1)

        # 因子收益率文件
        factor_return = factor_return.T
        factor_return_file = os.path.join(self.factor_return_path,
                                          "factor_return.csv")
        if os.path.exists(factor_return_file):
            old_data = self.get_factor_return()
            factor_return = Stock().pandas_add_row(old_data, factor_return)
        factor_return.to_csv(factor_return_file)
        factor_return_cum = factor_return.cumsum()
        factor_return_file = os.path.join(self.factor_return_path,
                                          "factor_return_cum.csv")
        factor_return_cum.to_csv(factor_return_file)

        # 股票残差率文件
        res_return = res_return.T
        res_return_file = os.path.join(self.res_return_path,
                                       "stock_residual_return.csv")
        if os.path.exists(res_return_file):
            old_data = self.get_stock_residual_return()
            res_return = Stock().pandas_add_row(old_data, res_return)
        res_return.to_csv(res_return_file)
Пример #20
0
    def cal_factor_exposure(self, beg_date, end_date):

        """
        流动性因子 LIQUIDITY
        LIQUIDITY = 0.35 * LIQUIDITY_STOM + 0.35 * LIQUIDITY_STOQ + 0.3 * LIQUIDITY_STOA
        LIQUIDITY 在对 SIZE 因子做回归取残差
        """

        # params
        self.cal_factor_liquidity_month()
        self.cal_factor_liquidity_quarter()
        self.cal_factor_liquidity_yearly()

        # calculate
        turnover_month = 0.35 * self.get_risk_factor_exposure(self.factor_name_month)
        turnover_quarter = 0.35 * self.get_risk_factor_exposure(self.factor_name_quarter)
        turnover_yearly = 0.30 * self.get_risk_factor_exposure(self.factor_name_yearly)

        liquidity = turnover_month.add(turnover_quarter, fill_value=0.0)
        liquidity = liquidity.add(turnover_yearly, fill_value=0.0)
        liquidity = liquidity.T.dropna(how='all').T

        # get res of regression
        size_data = self.get_risk_factor_exposure("cne5_normal_size")
        [size_data, liquidity] = FactorPreProcess().make_same_index_columns([size_data, liquidity])

        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(date_series) & set(liquidity.columns))
        date_series.sort()

        turnover_res = pd.DataFrame([])

        for i_date in range(len(date_series)):

            date = date_series[i_date]
            print('Calculating Barra Risk factor %s at date %s' % (self.factor_name, date))
            regression_data = pd.concat([size_data[date], liquidity[date]], axis=1)
            regression_data.columns = ['x', 'y']
            regression_data = regression_data.dropna()
            y = regression_data['y'].values
            x = regression_data['x'].values
            x_add = sm.add_constant(x)
            model = sm.OLS(y, x_add).fit()
            regression_data['res'] = regression_data['y'] - model.fittedvalues
            res_date = pd.DataFrame(regression_data['res'])
            res_date.columns = [date]
            turnover_res = pd.concat([turnover_res, res_date], axis=1)

        turnover_res = FactorPreProcess().remove_extreme_value_mad(turnover_res)
        turnover_res = FactorPreProcess().standardization(turnover_res)
        self.save_risk_factor_exposure(turnover_res, self.factor_name)
Пример #21
0
    def cal_factor_exposure(self, beg_date, end_date):
        """ 计算因子暴露 """

        # param
        term = 28
        effective_term = int(term * 0.8)

        # data
        pct = Stock().read_factor_h5("Pct_chg")

        # calculate data daily
        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(date_series) & set(pct.columns))
        date_series.sort()
        res = pd.DataFrame()

        for i in range(0, len(date_series)):

            current_date = date_series[i]
            data_beg_date = Date().get_trade_date_offset(
                current_date, -(term - 1))
            data_period = pct.ix[:, data_beg_date:current_date]
            data_period = data_period.T.dropna(how='all')

            if len(data_period) > effective_term:

                print('Calculating factor %s at date %s' %
                      (self.raw_factor_name, current_date))
                data_positive = data_period[data_period > 0.0].mean()
                data_negative = -data_period[data_period <= 0.0].mean()
                data_sum = data_positive + data_negative
                code_list = data_sum[data_sum != 0.0].index
                data_date = data_positive[code_list] / data_sum[code_list]
                effective_number = data_period.count()
                data_date[effective_number <= effective_term] = np.nan
                data_date = -pd.DataFrame(data_date.values,
                                          columns=[current_date],
                                          index=data_date.index)
            else:
                print('Calculating factor %s at date %s is null' %
                      (self.raw_factor_name, current_date))
                data_date = pd.DataFrame([],
                                         columns=[current_date],
                                         index=pct.index)

            res = pd.concat([res, data_date], axis=1)

        res = res.T.dropna(how='all').T
        self.save_alpha_factor_exposure(res, self.raw_factor_name)
Пример #22
0
    def cal_factor_exposure(self, beg_date, end_date):
        """ 计算因子暴露 """

        # param
        long_term = 60
        short_term = 20
        effective_term = int(0.8 * long_term)

        # read data
        turn_over = Stock().read_factor_h5("TurnOver_Daily").T

        # calculate data daily
        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(turn_over.index) & set(date_series))
        date_series.sort()
        res = pd.DataFrame()

        for i in range(0, len(date_series)):

            current_date = date_series[i]
            beg_date_long = Date().get_trade_date_offset(
                current_date, -(long_term - 1))
            beg_date_short = Date().get_trade_date_offset(
                current_date, -(short_term - 1))

            to_long = turn_over.loc[beg_date_long:current_date, :]
            to_long = to_long.T.dropna(how='all').T
            to_short = turn_over.loc[beg_date_short:current_date, :]
            to_short = to_short.T.dropna(how='all').T

            if len(to_long) >= effective_term:
                print('Calculating factor %s at date %s' %
                      (self.raw_factor_name, current_date))
                turn_over_diff = to_long.mean() - to_short.mean()
                turn_over_diff = pd.DataFrame(turn_over_diff)
                turn_over_diff.columns = [current_date]

            else:
                print('Calculating factor %s at date %s is null' %
                      (self.raw_factor_name, current_date))
                turn_over_diff = pd.DataFrame([],
                                              columns=[current_date],
                                              index=turn_over.columns)

            res = pd.concat([res, turn_over_diff], axis=1)

        res = res.T.dropna(how='all').T
        self.save_alpha_factor_exposure(res, self.raw_factor_name)
Пример #23
0
def stock_ratio_10(beg_date, end_date):

    factor_name = "Stock_Ratio_10"
    fund_holder = Fund().get_fund_holding_all()

    quarter_date = Date().get_last_fund_quarter_date(end_date)

    position_all = Fund().get_fund_factor("Stock_Ratio", date_list=[quarter_date], fund_pool=None).T
    position_all.columns = ['Stock_Weight']
    position_all = position_all[position_all['Stock_Weight'] > 65]

    code_list = list(position_all.index)
    date_list = Date().get_normal_date_series(beg_date=beg_date, end_date=end_date, period="Q")

    code_list.sort()
    date_list.sort()

    new_data = pd.DataFrame([], index=code_list, columns=date_list)

    for i_date in range(len(date_list)):

        for i_fund in range(len(code_list)):

            fund_code = code_list[i_fund]
            date = date_list[i_date]

            holder = fund_holder[fund_holder.FundCode == fund_code]
            holder = holder[holder.Date == date]
            holder = holder.sort_values(by=['Weight'], ascending=False)
            holder = holder.reset_index(drop=True)

            if len(holder) >= 10:
                holder = holder.ix[0:10, :]
                new_data.ix[fund_code, date] = holder.Weight.sum()
                print("计算 %s 在 %s 的前10大重仓股票为 %s" % (fund_code, date, holder.Weight.sum()))

    out_file = Parameter().get_read_file(factor_name)

    if os.path.exists(out_file):
        data = pd.read_csv(out_file, encoding='gbk', index_col=[0])
        data.index = data.index.map(str)
        data = pandas_add_row(data, new_data)
    else:
        print(" File No Exist ", factor_name)
        data = new_data

    data.to_csv(out_file)
Пример #24
0
    def cal_factor_exposure(self, beg_date, end_date):
        """ 计算因子暴露 """

        # param
        term = 40
        before_term = 30
        next_term = 10
        effective_term = int(term / 2)

        # read data
        click_num = Stock().read_factor_h5("click_num")

        # calculate data daily
        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(date_series) & set(click_num.columns))
        date_series.sort()
        res = pd.DataFrame()

        for i in range(0, len(date_series)):

            current_date = date_series[i]
            data_beg_date = Date().get_trade_date_offset(
                current_date, -(term - 1))
            data_period = click_num.loc[:, data_beg_date:current_date]
            data_period = data_period.T.dropna(how='all')

            if len(data_period) > effective_term:
                print('Calculating factor %s at date %s' %
                      (self.raw_factor_name, current_date))
                data_date_pre30 = data_period.iloc[0:before_term, :].mean()
                data_date_next10 = data_period.iloc[-next_term:, :].mean()
                data_date = -(data_date_next10 - data_date_pre30)
                effective_number = data_period.count()
                data_date[effective_number <= effective_term] = np.nan
                data_date = pd.DataFrame(data_date)
                data_date.columns = [current_date]
            else:
                print('Calculating factor %s at date %s is null' %
                      (self.raw_factor_name, current_date))
                data_date = pd.DataFrame([],
                                         columns=[current_date],
                                         index=click_num.index)

            res = pd.concat([res, data_date], axis=1)

        res = res.T.dropna(how='all').T
        self.save_alpha_factor_exposure(res, self.raw_factor_name)
Пример #25
0
    def cal_factor_exposure(self, beg_date, end_date):
        """ 计算因子暴露 """

        # param
        term = 60
        effective_term = int(term * 0.6)

        # read data
        ff3_r2 = FamaFrench().get_data("model_ff3", "FF3_R2")

        # calculate data daily
        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(ff3_r2.columns) & set(date_series))
        date_series.sort()
        res = pd.DataFrame()

        # FamaFrench().cal_all_factor_pct()
        # FamaFrench().ff3_model(beg_date, end_date)

        for i in range(0, len(date_series)):

            current_date = date_series[i]
            data_beg_date = Date().get_trade_date_offset(
                current_date, -(term - 1))
            data_period = ff3_r2.loc[:, data_beg_date:current_date]
            data_period = data_period.T.dropna(how='all')

            if len(data_period) > effective_term:
                print('Calculating factor %s at date %s' %
                      (self.raw_factor_name, current_date))
                data_date = -data_period.std() * np.sqrt(250) / 100.0
                effective_number = data_period.count()
                data_date[effective_number <= effective_term] = np.nan
                data_date = pd.DataFrame(data_date)
                data_date.columns = [current_date]
            else:
                print('Calculating factor %s at date %s is null' %
                      (self.raw_factor_name, current_date))
                data_date = pd.DataFrame([],
                                         columns=[current_date],
                                         index=ff3_r2.index)

            res = pd.concat([res, data_date], axis=1)

        res = res.T.dropna(how='all').T
        self.save_alpha_factor_exposure(res, self.raw_factor_name)
    def cal_factor_exposure(self, beg_date, end_date):

        """ 计算因子暴露 """

        # param
        long_term = 60
        short_term = 20
        effective_term = int(long_term * 0.8)

        # read data
        ff3_residual = FamaFrench().get_data("model_ff3", "FF3_ResidualReturn") / 100.0

        # calculate data daily
        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(ff3_residual.columns) & set(date_series))
        date_series.sort()
        res = pd.DataFrame()

        # FamaFrench().cal_all_factor_pct()
        # FamaFrench().ff3_model(beg_date, end_date)

        for i in range(0, len(date_series)):

            current_date = date_series[i]
            long_beg_date = Date().get_trade_date_offset(current_date, -(long_term - 1))
            short_beg_date = Date().get_trade_date_offset(current_date, -(short_term - 1))
            data_long = ff3_residual.loc[:, long_beg_date:current_date]
            data_short = ff3_residual.loc[:, short_beg_date:current_date]
            data_long = data_long.T.dropna(how='all')
            data_short = data_short.T.dropna(how='all')

            if len(data_long) > effective_term:
                print('Calculating factor %s at date %s' % (self.raw_factor_name, current_date))
                data_date = - data_short.std() / data_long.std()
                effective_number = data_long.count()
                data_date[effective_number <= effective_term] = np.nan
                data_date = pd.DataFrame(data_date)
                data_date.columns = [current_date]
            else:
                print('Calculating factor %s at date %s is null' % (self.raw_factor_name, current_date))
                data_date = pd.DataFrame([], columns=[current_date], index=ff3_residual.index)

            res = pd.concat([res, data_date], axis=1)

        res = res.T.dropna(how='all').T
        self.save_alpha_factor_exposure(res, self.raw_factor_name)
    def cal_factor_exposure(self, beg_date, end_date):

        """ 计算因子暴露 """

        # params
        long_term = 120
        short_term = int(long_term * 0.5)

        # read data
        trade_amount = Stock().read_factor_h5("TradeAmount").T / 100000000
        trade_amount = trade_amount.dropna(how='all')

        # calculate data daily
        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(trade_amount.index) & set(date_series))
        date_series.sort()
        res = pd.DataFrame([])

        for i in range(0, len(date_series)):

            current_date = date_series[i]
            data_beg_date = Date().get_trade_date_offset(current_date, -(long_term - 1))
            amount_before = trade_amount.loc[data_beg_date:current_date, :]
            amount_before = amount_before.fillna(0.0)

            if len(amount_before) == long_term:

                print('Calculating factor %s at date %s' % (self.raw_factor_name, current_date))
                zero_number = amount_before.applymap(lambda x: 1.0 if x == 0.0 else 0.0).sum()
                code_filter_list = (zero_number[zero_number < short_term]).index

                amount_before = trade_amount.loc[data_beg_date:current_date, code_filter_list]
                amount_log = amount_before.applymap(lambda x: np.nan if x == 0 else -np.log(x))

                weight = np.array(list(range(1, long_term + 1)))
                weight_amount = np.dot(amount_log.T.values, weight)
                weight_amount = pd.DataFrame(weight_amount, index=amount_log.columns, columns=[current_date])

            else:
                print('Calculating factor %s at date %s is null' % (self.raw_factor_name, current_date))
                weight_amount = pd.DataFrame([], columns=[current_date], index=trade_amount.columns)

            res = pd.concat([res, weight_amount], axis=1)

        res = res.T.dropna(how='all').T
        self.save_alpha_factor_exposure(res, self.raw_factor_name)
Пример #28
0
    def cal_factor_exposure(self, beg_date, end_date):
        """ 计算因子暴露 """

        # param
        term = 150
        effective_term = 120

        # read data
        pct = Stock().read_factor_h5("Pct_chg")

        # calculate data daily
        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(pct.columns) & set(date_series))
        date_series.sort()
        res = pd.DataFrame()

        for i in range(0, len(date_series)):

            current_date = date_series[i]
            data_beg_date = Date().get_trade_date_offset(
                current_date, -(term - 1))
            pct_before = pct.ix[:, data_beg_date:current_date]
            pct_stock = pct_before.T.dropna(how='all')

            if len(pct_stock) > effective_term:
                print('Calculating factor %s at date %s' %
                      (self.raw_factor_name, current_date))
                skew_date = -pct_stock.skew()
                effective_number = pct_stock.count()
                skew_date[effective_number <= effective_term] = np.nan
                skew_date = pd.DataFrame(skew_date.values,
                                         columns=[current_date],
                                         index=skew_date.index)
            else:
                print('Calculating factor %s at date %s is null' %
                      (self.raw_factor_name, current_date))
                skew_date = pd.DataFrame([],
                                         columns=[current_date],
                                         index=pct.index)

            res = pd.concat([res, skew_date], axis=1)

        res = res.T.dropna(how='all').T
        self.save_alpha_factor_exposure(res, self.raw_factor_name)
Пример #29
0
    def cal_factor_exposure(self, beg_date, end_date):
        """ 计算因子暴露 """

        # param
        term = 60
        effective_term = 30

        # read data
        pct = Stock().read_factor_h5("Pct_chg").T

        # calculate data daily
        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(date_series) & set(pct.index))
        date_series.sort()

        res = pd.DataFrame([])

        for i in range(0, len(date_series)):

            current_date = date_series[i]
            data_beg_date = Date().get_trade_date_offset(
                current_date, -(term - 1))
            data_period = pct.loc[data_beg_date:current_date, :]
            data_period = data_period.dropna(how='all')
            data_period /= 100.0

            if len(data_period) == term:

                print('Calculating factor %s at date %s' %
                      (self.raw_factor_name, current_date))
                momentum = ((data_period + 1.0).cumprod() -
                            1.0).loc[current_date, :]
                vaild = data_period.count() >= effective_term
                momentum[~vaild] = np.nan
                momentum = -pd.DataFrame(momentum)
                momentum.columns = [current_date]
                res = pd.concat([res, momentum], axis=1)

            else:
                print('Calculating factor %s at date %s is null' %
                      (self.raw_factor_name, current_date))

        res = res.T.dropna(how='all').T
        self.save_alpha_factor_exposure(res, self.raw_factor_name)
Пример #30
0
    def cal_weight_at_all_daily(self):
        """
        计算在每个交易日的股票权重
        """

        self.get_weight_at_all_change_date()
        beg_date = self.port_hold.columns[0]
        end_date = datetime.today().strftime("%Y%m%d")
        date_series = Date().get_trade_date_series(beg_date, end_date)
        date_series = list(set(date_series) & set(self.asset_return.columns))
        date_series.sort()
        date_change_date_list = list(self.port_hold.columns)

        for i_date in range(len(date_series)):

            date = date_series[i_date]
            date_before = Date().get_trade_date_offset(date, -1)

            if date in date_change_date_list:
                self.port_hold_daily[date] = self.port_hold[date]
                print(" Calculating Weight of Portfolio %s At Date %s " %
                      (self.port_name, date))
            else:
                print(" Calculating Weight of Portfolio %s At Date %s " %
                      (self.port_name, date))
                weight_before = self.port_hold_daily[date_before]
                pct_date = self.asset_return[date]
                concat_data = pd.concat([weight_before, pct_date], axis=1)
                concat_data.columns = ["WeightBefore", "PctCur"]
                if "Cash" in concat_data.index:
                    concat_data.loc['Cash', "PctCur"] = 0.0
                concat_data = concat_data.dropna(subset=["WeightBefore"])
                average_pct = concat_data["PctCur"].median()
                concat_data["PctCur"] = concat_data["PctCur"].fillna(
                    average_pct)
                concat_data["Weight"] = concat_data["WeightBefore"] * (
                    1.0 + concat_data["PctCur"] / 100.0)
                concat_data["Weight"] = concat_data["Weight"] / concat_data[
                    "Weight"].sum()
                self.port_hold_daily[date] = concat_data["Weight"]

        sub_path = os.path.join(self.save_path, self.port_name)
        self.port_hold_daily.to_csv(
            os.path.join(sub_path, self.port_name + '_PortHoldDaily.csv'))