def predict_by_model(ref_date: str, alpha_model: ModelBase, data_meta: DataMeta = None, x_values: pd.DataFrame = None, codes: Iterable[int] = None): if x_values is None: predict_data = data_meta.fetch_predict_data(ref_date, alpha_model) codes, x_values = predict_data['predict']['code'], predict_data['predict']['x'] return pd.DataFrame(alpha_model.predict(x_values).flatten(), index=codes), x_values
def train_model(ref_date: str, alpha_model: ModelBase, data_meta: DataMeta = None, x_values: pd.DataFrame = None, y_values: pd.DataFrame = None): base_model = copy.deepcopy(alpha_model) if not isinstance(alpha_model, ConstLinearModel): if x_values is None: train_data = data_meta.fetch_train_data(ref_date, alpha_model) x_values = train_data['train']['x'] y_values = train_data['train']['y'] base_model.fit(x_values, y_values) return base_model, x_values, y_values
fit_intercept = True kernal_feature = 'roe_q' regress_features = {kernal_feature: LAST(kernal_feature), kernal_feature + '_l1': SHIFT(kernal_feature, 1), kernal_feature + '_l2': SHIFT(kernal_feature, 2), kernal_feature + '_l3': SHIFT(kernal_feature, 3) } const_features = {kernal_feature: LAST(kernal_feature)} fit_target = [kernal_feature] data_meta = DataMeta(freq=freq, universe=universe, batch=batch, neutralized_risk=neutralized_risk, risk_model=risk_model, pre_process=pre_process, post_process=post_process, warm_start=warm_start, data_source=data_source) alpha_model = LinearRegression(features=regress_features, fit_intercept=True, fit_target=fit_target) composer = Composer(alpha_model=alpha_model, data_meta=data_meta) start_date = '2014-01-01' end_date = '2016-01-01' regression_model = LinearRegression(features=regress_features, fit_intercept=fit_intercept, fit_target=fit_target) regression_composer = Composer(alpha_model=regression_model, data_meta=data_meta) data_package1 = fetch_data_package(engine,
} weights = {'f1': 1., 'f2': 0.} # alpha_model = XGBTrainer(objective='reg:linear', # booster='gbtree', # n_estimators=300, # eval_sample=0.25, # features=alpha_factors) alpha_model = ConstLinearModel(features=alpha_factors, weights=weights) data_meta = DataMeta(freq=freq, universe=universe, batch=1, neutralized_risk=neutralized_risk, pre_process=None, post_process=None, warm_start=1) industries = industry_list('sw_adj', 1) total_risk_names = ['total', 'benchmark'] + industries b_type = [] l_val = [] u_val = [] for name in total_risk_names: if name == 'total': b_type.append(BoundaryType.ABSOLUTE)
} weights = {str(factor): 1.} # alpha_model = XGBTrainer(objective='reg:linear', # booster='gbtree', # n_estimators=300, # eval_sample=0.25, # features=alpha_factors) alpha_model = ConstLinearModel(features=alpha_factors, weights=weights) data_meta = DataMeta( freq=freq, universe=universe, batch=32, neutralized_risk=None, # industry_styles, pre_process=None, # [winsorize_normal, standardize], post_process=None, warm_start=12) # [standardize]) industries = industry_list('sw_adj', 1) total_risk_names = ['total'] b_type = [] l_val = [] u_val = [] for name in total_risk_names: if name == 'total': b_type.append(BoundaryType.ABSOLUTE)
start_date = '2011-01-01' end_date = '2011-05-04' freq = '10b' neutralized_risk = None alpha_factors = {'ep_q_cs': CSQuantiles(LAST('ep_q'), groups='sw1_adj')} weights = dict(ep_q_cs=1.) alpha_model = ConstLinearModel(features=alpha_factors, weights=weights) data_meta = DataMeta(freq=freq, universe=universe, batch=1, neutralized_risk=None, pre_process=None, post_process=None, data_source=os.environ['DB_URI']) strategy = Strategy(alpha_model, data_meta, universe=universe, start_date=start_date, end_date=end_date, freq=freq, benchmark=benchmark_code) strategy.prepare_backtest_data() def create_scenario(weights_bandwidth=0.02,