def _create_feature(): fc1 = QuarterlyFeatures(data_key='quarterly', columns=QUARTER_COLUMNS, quarter_counts=QUARTER_COUNTS, max_back_quarter=MAX_BACK_QUARTER) fc2 = BaseCompanyFeatures(data_key='base', cat_columns=CAT_COLUMNS) fc3 = QuarterlyDiffFeatures(data_key='quarterly', columns=QUARTER_COLUMNS, compare_quarter_idxs=COMPARE_QUARTER_IDXS, max_back_quarter=MAX_BACK_QUARTER) fc4 = DailyAggQuarterFeatures(daily_data_key='commodities', quarterly_data_key='quarterly', columns=['price'], agg_day_counts=AGG_DAY_COUNTS, max_back_quarter=MAX_BACK_QUARTER, daily_index=COMMODITIES_CODES) feature = FeatureMerger(fc1, fc2, on='ticker') feature = FeatureMerger(feature, fc3, on=['ticker', 'date']) feature = FeatureMerger(feature, fc4, on=['ticker', 'date']) return feature
def _create_feature(): fc1 = QuarterlyFeatures(data_key='quarterly', columns=QUARTER_COLUMNS, quarter_counts=QUARTER_COUNTS, max_back_quarter=MAX_BACK_QUARTER) fc2 = BaseCompanyFeatures(data_key='base', cat_columns=CAT_COLUMNS) # Daily agss on marketcap and pe is possible here because it # normalized and there are no leakage. fc3 = DailyAggQuarterFeatures(daily_data_key='daily', quarterly_data_key='quarterly', columns=DAILY_AGG_COLUMNS, agg_day_counts=AGG_DAY_COUNTS, max_back_quarter=MAX_BACK_QUARTER) fc4 = DailyAggQuarterFeatures(daily_data_key='commodities', quarterly_data_key='quarterly', columns=['price'], agg_day_counts=AGG_DAY_COUNTS, max_back_quarter=MAX_BACK_QUARTER, daily_index=COMMODITIES_CODES) feature = FeatureMerger(fc1, fc2, on='ticker') feature = FeatureMerger(feature, fc3, on=['ticker', 'date']) feature = FeatureMerger(feature, fc4, on=['ticker', 'date']) return feature
def _create_feature(): fc1 = QuarterlyFeatures(data_key='quarterly', columns=QUARTER_COLUMNS, quarter_counts=QUARTER_COUNTS, max_back_quarter=MAX_BACK_QUARTER) fc2 = BaseCompanyFeatures(data_key='base', cat_columns=CAT_COLUMNS) fc3 = QuarterlyDiffFeatures(data_key='quarterly', columns=QUARTER_COLUMNS, compare_quarter_idxs=COMPARE_QUARTER_IDXS, max_back_quarter=MAX_BACK_QUARTER) feature = FeatureMerger(fc1, fc2, on='ticker') feature = FeatureMerger(feature, fc3, on=['ticker', 'date']) return feature
def _create_feature(): fc1 = QuarterlyFeatures(data_key='quarterly', columns=QUARTER_COLUMNS, quarter_counts=QUARTER_COUNTS, max_back_quarter=1) fc2 = BaseCompanyFeatures(data_key='base', cat_columns=CAT_COLUMNS) feature = FeatureMerger(fc1, fc2, on='ticker') return feature
def test_calculate(self, data, tickers): fc1 = QuarterlyFeatures(data_key='quarterly', columns=['ebit'], quarter_counts=[2], max_back_quarter=10) fc2 = QuarterlyDiffFeatures(data_key='quarterly', columns=['ebit', 'debt'], compare_quarter_idxs=[1, 4], max_back_quarter=10) fc3 = BaseCompanyFeatures(data_key='base', cat_columns=['sector', 'sicindustry']) X1 = fc1.calculate(data, tickers) X2 = fc2.calculate(data, tickers) X3 = fc3.calculate(data, tickers) fm1 = FeatureMerger(fc1, fc2, on=['ticker', 'date']) Xm1 = fm1.calculate(data, tickers) fm2 = FeatureMerger(fc1, fc3, on='ticker') Xm2 = fm2.calculate(data, tickers) assert Xm1.shape[0] == X1.shape[0] assert Xm2.shape[0] == X1.shape[0] assert Xm1.shape[1] == X1.shape[1] + X2.shape[1] assert Xm2.shape[1] == X1.shape[1] + X3.shape[1] assert (Xm1.index == X1.index).min() assert (Xm2.index == X1.index).min() new_cols = Xm1.columns[:X1.shape[1]] old_cols = X1.columns for nc, oc in zip(new_cols, old_cols): assert (Xm1[nc] == X1[oc]).min() new_cols = Xm2.columns[:X1.shape[1]] old_cols = X1.columns for nc, oc in zip(new_cols, old_cols): assert (Xm2[nc] == X1[oc]).min()
scalemarketcap=SCALE_MARKETCAP) ticker_list = tickers_df['ticker'].unique().tolist() fc1 = QuarterlyFeatures( columns=QUARTER_COLUMNS, quarter_counts=QUARTER_COUNTS, max_back_quarter=MAX_BACK_QUARTER) fc2 = BaseCompanyFeatures(cat_columns=CAT_COLUMNS) fc3 = QuarterlyDiffFeatures( columns=QUARTER_COLUMNS, compare_quarter_idxs=COMPARE_QUARTER_IDXS, max_back_quarter=MAX_BACK_QUARTER) feature = FeatureMerger(fc1, fc2, on='ticker') feature = FeatureMerger(feature, fc3, on=['ticker', 'date']) target = QuarterlyDiffTarget(col='marketcap') base_models = [lgbm.sklearn.LGBMRegressor(), ctb.CatBoostRegressor(verbose=False)] ensemble = EnsembleModel(base_models=base_models, bagging_fraction=BAGGING_FRACTION, model_cnt=MODEL_CNT) model = GroupedOOFModel(ensemble, group_column='ticker', fold_cnt=FOLD_CNT)