def _calculate_pos(self, running_setting, er, data, constraints, benchmark_w, lbound, ubound, risk_model, current_position): more_opts = running_setting.more_opts try: target_pos, _ = er_portfolio_analysis( er, industry=data.industry_name.values, dx_return=None, constraints=constraints, detail_analysis=False, benchmark=benchmark_w, method=running_setting.rebalance_method, lbound=lbound, ubound=ubound, current_position=current_position, target_vol=more_opts.get('target_vol'), risk_model=risk_model, turn_over_target=more_opts.get('turn_over_target')) except PortfolioBuilderException: alpha_logger.warning( "Not able to fit the constraints. Using full re-balance.") target_pos, _ = er_portfolio_analysis( er, industry=data.industry_name.values, dx_return=None, constraints=constraints, detail_analysis=False, benchmark=benchmark_w, method=running_setting.rebalance_method, lbound=lbound, ubound=ubound, target_vol=more_opts.get('target_vol'), risk_model=risk_model) return target_pos
def data_info_log(df, table): data_len = len(df) if data_len > 0: alpha_logger.info("{0} records will be inserted in {1}".format( data_len, table)) else: msg = "No records will be inserted in {0}".format(table) alpha_logger.warning(msg) raise ValueError(msg)
def load(cls, model_desc: dict): obj_layout = super().load(model_desc) if LooseVersion(sklearn_version) < LooseVersion( model_desc['sklearn_version']): alpha_logger.warning( 'Current sklearn version {0} is lower than the model version {1}. ' 'Loaded model may work incorrectly.'.format( sklearn_version, model_desc['sklearn_version'])) return obj_layout
def load(cls, model_desc: dict): obj_layout = cls() obj_layout.features = model_desc['features'] obj_layout.trained_time = model_desc['trained_time'] if LooseVersion(sklearn_version) < LooseVersion(model_desc['sklearn_version']): alpha_logger.warning('Current sklearn version {0} is lower than the model version {1}. ' 'Loaded model may work incorrectly.'.format( sklearn_version, model_desc['sklearn_version'])) obj_layout.impl = decode(model_desc['desc']) return obj_layout
def cs_impl(ref_date, factor_data, factor_name, risk_exposure, constraint_risk, industry_matrix, dx_returns): total_data = pd.merge(factor_data, risk_exposure, on='code') total_data = pd.merge(total_data, industry_matrix, on='code') total_data = total_data.replace([np.inf, -np.inf], np.nan).dropna() if len(total_data) < 0.33 * len(factor_data): alpha_logger.warning( f"valid data point({len(total_data)}) " f"is less than 33% of the total sample ({len(factor_data)}). Omit this run" ) return np.nan, np.nan, np.nan total_risk_exp = total_data[constraint_risk] er = total_data[[factor_name]].values.astype(float) er = factor_processing(er, [winsorize_normal, standardize], total_risk_exp.values, [standardize]).flatten() industry = total_data.industry_name.values codes = total_data.code.tolist() target_pos = pd.DataFrame({ 'code': codes, 'weight': er, 'industry': industry }) target_pos['weight'] = target_pos['weight'] / target_pos['weight'].abs( ).sum() target_pos = pd.merge(target_pos, dx_returns, on=['code']) target_pos = pd.merge(target_pos, total_data[['code'] + constraint_risk], on=['code']) total_risk_exp = target_pos[constraint_risk] activate_weight = target_pos['weight'].values excess_return = np.exp(target_pos[['dx']].values) - 1. excess_return = factor_processing( excess_return, [winsorize_normal, standardize], total_risk_exp.values, [winsorize_normal, standardize]).flatten() port_ret = np.log(activate_weight @ excess_return + 1.) ic = np.corrcoef(excess_return, activate_weight)[0, 1] x = sm.add_constant(activate_weight) results = sm.OLS(excess_return, x).fit() t_stats = results.tvalues[1] alpha_logger.info( f"{ref_date} is finished with {len(target_pos)} stocks for {factor_name}" ) alpha_logger.info( f"{ref_date} risk_exposure: " f"{np.sum(np.square(target_pos.weight.values @ target_pos[constraint_risk].values))}" ) return port_ret, ic, t_stats
def save(self) -> dict: if self.__class__.__module__ == '__main__': alpha_logger.warning( "model is defined in a main module. The model_name may not be correct." ) model_desc = dict(model_name=self.__class__.__module__ + "." + self.__class__.__name__, language='python', saved_time=arrow.now().format("YYYY-MM-DD HH:mm:ss"), features=list(self.features)) return model_desc
def save(self) -> dict: if self.__class__.__module__ == '__main__': alpha_logger.warning( "model is defined in a main module. The model_name may not be correct." ) model_desc = dict(model_name=self.__class__.__module__ + "." + self.__class__.__name__, language='python', saved_time=arrow.now().format("YYYY-MM-DD HH:mm:ss"), features=list(self.features), trained_time=self.trained_time, desc=self.model_encode(), formulas=encode(self.formulas), fit_target=encode(self.fit_target), internal_model=self.impl.__class__.__module__ + "." + self.impl.__class__.__name__) return model_desc
def load(cls, model_desc: dict): obj_layout = super().load(model_desc) if cls._lib_name == 'sklearn': current_version = sklearn_version elif cls._lib_name == 'xgboost': current_version = xgbboot_version else: raise ValueError( "3rd party lib name ({0}) is not recognized".format( cls._lib_name)) if LooseVersion(current_version) < LooseVersion( model_desc[cls._lib_name + "_version"]): alpha_logger.warning( 'Current {2} version {0} is lower than the model version {1}. ' 'Loaded model may work incorrectly.'.format( sklearn_version, model_desc[cls._lib_name], cls._lib_name)) return obj_layout
def factor_processing(raw_factors: np.ndarray, pre_process: Optional[List] = None, risk_factors: Optional[np.ndarray] = None, post_process: Optional[List] = None, groups=None) -> np.ndarray: new_factors = raw_factors if pre_process: for p in pre_process: new_factors = p(new_factors, groups=groups) if risk_factors is not None: risk_factors = risk_factors[:, risk_factors.sum(axis=0) != 0] new_factors = neutralize(risk_factors, new_factors, groups=groups) if post_process: for p in post_process: if p.__name__ == 'winsorize_normal': alpha_logger.warning( "winsorize_normal " "normally should not be done after neutralize") new_factors = p(new_factors, groups=groups) return new_factors