def _create_model(): model = GroupedOOFModel( base_model=LogExpModel(ctb.CatBoostRegressor(verbose=False)), group_column='ticker', fold_cnt=FOLD_CNT) return model
def _create_model(): base_models = [ lgbm.sklearn.LGBMRegressor(), ctb.CatBoostRegressor(verbose=False) ] ensemble = EnsembleModel(base_models=base_models, bagging_fraction=BAGGING_FRACTION, model_cnt=MODEL_CNT) model = GroupedOOFModel(ensemble, group_column='ticker', fold_cnt=FOLD_CNT) return model
def _create_base_components(self): columns = ['revenue', 'netinc', 'ncf', 'ebitda', 'debt', 'fcf'] f1 = QuarterlyFeatures(columns=columns, quarter_counts=[2, 10], max_back_quarter=1) target = QuarterlyTarget(col='marketcap', quarter_shift=0) model = GroupedOOFModel(lgbm.sklearn.LGBMRegressor(), group_column='ticker', fold_cnt=4) return f1, target, model
def test_fit_execute_multi_target_model(self, data_loader): f1, target, model = self._create_base_components() target1 = QuarterlyTarget(col='marketcap', quarter_shift=-1) model1 = GroupedOOFModel(ctb.CatBoostRegressor(verbose=False), group_column='ticker', fold_cnt=4) pipeline = BasePipeline(feature=f1, target=[target, target], model=[model, model1], metric=median_absolute_relative_error, out_name=None) res = pipeline.fit(data_loader, tickers) assert type(res) == dict assert res['metric_y_0'] > 0 assert res['metric_y_1'] > 0 df = pipeline.execute(data_loader, tickers) assert type(df) == pd.DataFrame assert df['y_0'].mean() > 0 assert df['y_1'].mean() > 0 assert (df['y_0'] == df['y_1']).min() == False
fc3 = QuarterlyDiffFeatures( columns=QUARTER_COLUMNS, compare_quarter_idxs=COMPARE_QUARTER_IDXS, max_back_quarter=MAX_BACK_QUARTER) feature = FeatureMerger(fc1, fc2, on='ticker') feature = FeatureMerger(feature, fc3, on=['ticker', 'date']) target = QuarterlyDiffTarget(col='marketcap') base_models = [lgbm.sklearn.LGBMRegressor(), ctb.CatBoostRegressor(verbose=False)] ensemble = EnsembleModel(base_models=base_models, bagging_fraction=BAGGING_FRACTION, model_cnt=MODEL_CNT) model = GroupedOOFModel(ensemble, group_column='ticker', fold_cnt=FOLD_CNT) pipeline = BasePipeline(feature=feature, target=target, model=model, metric=median_absolute_relative_error, out_name=OUT_NAME) result = pipeline.fit(data_loader, ticker_list) print(result) pipeline.export_core(SAVE_PATH)
def test_fit_predict(self): X_, y = gen_grouped_data(1000) model = GroupedOOFModel(GroupTestModel(), group_column='ticker', fold_cnt=5) for X in [X_, X_.set_index(['ticker', 'date'])]: model.fit(X, y['y']) pred = model.predict(X) assert len(X) == len(pred) assert len(model.group_df) == 20 assert len(model.group_df['fold_id'].unique()) == 5 info = X.copy() info['y'] = y['y'] info['pred'] = pred info = info.reset_index() info = pd.merge(info.rename({'ticker': 'group'}, axis=1), model.group_df, on='group', how='left') assert (info['y'] != info['pred']).min() folds_df = pd.merge(info.groupby('fold_id')['y'].unique(), info.groupby('fold_id')['pred'].unique(), on='fold_id', how='left') assert folds_df.apply(lambda x: len(set(x['y']) \ .intersection(set(x['pred']))) == 0, axis=1).min() if 'ticker' in X.columns: X['ticker'] = 100500 pred = model.predict(X) assert len(set(pred).intersection(set( folds_df.loc[0]['y']))) == 0 X_, y = gen_grouped_data(1000) model = GroupedOOFModel(lgbm.sklearn.LGBMClassifier(), group_column='ticker', fold_cnt=5) model.fit(X, y['y'] > 5) pred = model.predict(X) assert (pred >= 0).min() assert (pred <= 1).min()