def _create_model():
    model = GroupedOOFModel(
                base_model=LogExpModel(ctb.CatBoostRegressor(verbose=False)),
                group_column='ticker',
                fold_cnt=FOLD_CNT)
    
    return model
示例#2
0
def _create_model():
    base_models = [
        lgbm.sklearn.LGBMRegressor(),
        ctb.CatBoostRegressor(verbose=False)
    ]

    ensemble = EnsembleModel(base_models=base_models,
                             bagging_fraction=BAGGING_FRACTION,
                             model_cnt=MODEL_CNT)

    model = GroupedOOFModel(ensemble, group_column='ticker', fold_cnt=FOLD_CNT)

    return model
示例#3
0
    def _create_base_components(self):
        columns = ['revenue', 'netinc', 'ncf', 'ebitda', 'debt', 'fcf']
        f1 = QuarterlyFeatures(columns=columns,
                               quarter_counts=[2, 10],
                               max_back_quarter=1)

        target = QuarterlyTarget(col='marketcap', quarter_shift=0)

        model = GroupedOOFModel(lgbm.sklearn.LGBMRegressor(),
                                group_column='ticker',
                                fold_cnt=4)

        return f1, target, model
示例#4
0
    def test_fit_execute_multi_target_model(self, data_loader):
        f1, target, model = self._create_base_components()
        target1 = QuarterlyTarget(col='marketcap', quarter_shift=-1)
        model1 = GroupedOOFModel(ctb.CatBoostRegressor(verbose=False),
                                 group_column='ticker',
                                 fold_cnt=4)
        pipeline = BasePipeline(feature=f1,
                                target=[target, target],
                                model=[model, model1],
                                metric=median_absolute_relative_error,
                                out_name=None)

        res = pipeline.fit(data_loader, tickers)
        assert type(res) == dict
        assert res['metric_y_0'] > 0
        assert res['metric_y_1'] > 0
        df = pipeline.execute(data_loader, tickers)
        assert type(df) == pd.DataFrame
        assert df['y_0'].mean() > 0
        assert df['y_1'].mean() > 0
        assert (df['y_0'] == df['y_1']).min() == False
示例#5
0
    fc3 = QuarterlyDiffFeatures(
        columns=QUARTER_COLUMNS,
        compare_quarter_idxs=COMPARE_QUARTER_IDXS,
        max_back_quarter=MAX_BACK_QUARTER)
                            
    feature = FeatureMerger(fc1, fc2, on='ticker')
    feature = FeatureMerger(feature, fc3, on=['ticker', 'date'])

    target = QuarterlyDiffTarget(col='marketcap')

    base_models = [lgbm.sklearn.LGBMRegressor(),
                   ctb.CatBoostRegressor(verbose=False)]
                   
    ensemble = EnsembleModel(base_models=base_models, 
                             bagging_fraction=BAGGING_FRACTION,
                             model_cnt=MODEL_CNT)

    model = GroupedOOFModel(ensemble,
                            group_column='ticker',
                            fold_cnt=FOLD_CNT)

    pipeline = BasePipeline(feature=feature, 
                            target=target, 
                            model=model, 
                            metric=median_absolute_relative_error,
                            out_name=OUT_NAME)
                            
    result = pipeline.fit(data_loader, ticker_list)
    print(result)
    pipeline.export_core(SAVE_PATH) 
示例#6
0
    def test_fit_predict(self):
        X_, y = gen_grouped_data(1000)
        model = GroupedOOFModel(GroupTestModel(),
                                group_column='ticker',
                                fold_cnt=5)

        for X in [X_, X_.set_index(['ticker', 'date'])]:
            model.fit(X, y['y'])
            pred = model.predict(X)
            assert len(X) == len(pred)
            assert len(model.group_df) == 20
            assert len(model.group_df['fold_id'].unique()) == 5
            info = X.copy()
            info['y'] = y['y']
            info['pred'] = pred
            info = info.reset_index()
            info = pd.merge(info.rename({'ticker': 'group'}, axis=1),
                            model.group_df,
                            on='group',
                            how='left')
            assert (info['y'] != info['pred']).min()
            folds_df = pd.merge(info.groupby('fold_id')['y'].unique(),
                                info.groupby('fold_id')['pred'].unique(),
                                on='fold_id',
                                how='left')

            assert folds_df.apply(lambda x: len(set(x['y']) \
                                 .intersection(set(x['pred']))) == 0,
                                 axis=1).min()

            if 'ticker' in X.columns:
                X['ticker'] = 100500
                pred = model.predict(X)
                assert len(set(pred).intersection(set(
                    folds_df.loc[0]['y']))) == 0

        X_, y = gen_grouped_data(1000)
        model = GroupedOOFModel(lgbm.sklearn.LGBMClassifier(),
                                group_column='ticker',
                                fold_cnt=5)
        model.fit(X, y['y'] > 5)
        pred = model.predict(X)
        assert (pred >= 0).min()
        assert (pred <= 1).min()