Exemplo n.º 1
0
def _create_feature():
    fc1 = QuarterlyFeatures(data_key='quarterly',
                            columns=QUARTER_COLUMNS,
                            quarter_counts=QUARTER_COUNTS,
                            max_back_quarter=MAX_BACK_QUARTER)

    fc2 = BaseCompanyFeatures(data_key='base', cat_columns=CAT_COLUMNS)

    fc3 = QuarterlyDiffFeatures(data_key='quarterly',
                                columns=QUARTER_COLUMNS,
                                compare_quarter_idxs=COMPARE_QUARTER_IDXS,
                                max_back_quarter=MAX_BACK_QUARTER)

    fc4 = DailyAggQuarterFeatures(daily_data_key='commodities',
                                  quarterly_data_key='quarterly',
                                  columns=['price'],
                                  agg_day_counts=AGG_DAY_COUNTS,
                                  max_back_quarter=MAX_BACK_QUARTER,
                                  daily_index=COMMODITIES_CODES)

    feature = FeatureMerger(fc1, fc2, on='ticker')
    feature = FeatureMerger(feature, fc3, on=['ticker', 'date'])
    feature = FeatureMerger(feature, fc4, on=['ticker', 'date'])

    return feature
Exemplo n.º 2
0
    def test_calculate(self, data_loader, tickers, columns, 
                                 compare_quarter_idxs, max_back_quarter):
        fc = QuarterlyDiffFeatures(columns=columns,
                                   compare_quarter_idxs=compare_quarter_idxs,
                                   max_back_quarter=max_back_quarter)
                            
        X = fc.calculate(data_loader, tickers)

        assert type(X) == pd.DataFrame
        assert 'ticker' in X.index.names
        assert 'date' in X.index.names

        if type(data_loader) == GeneratedData:
            assert X.shape[0] == max_back_quarter * len(tickers)
        else:
            assert X.shape[0] <= max_back_quarter * len(tickers)

        assert X.shape[1] == len(compare_quarter_idxs) * len(columns)
Exemplo n.º 3
0
    def test_calculate(self, data, tickers):
        fc1 = QuarterlyFeatures(data_key='quarterly',
                                columns=['ebit'],
                                quarter_counts=[2],
                                max_back_quarter=10)

        fc2 = QuarterlyDiffFeatures(data_key='quarterly',
                                    columns=['ebit', 'debt'],
                                    compare_quarter_idxs=[1, 4],
                                    max_back_quarter=10)

        fc3 = BaseCompanyFeatures(data_key='base',
                                  cat_columns=['sector', 'sicindustry'])

        X1 = fc1.calculate(data, tickers)
        X2 = fc2.calculate(data, tickers)
        X3 = fc3.calculate(data, tickers)

        fm1 = FeatureMerger(fc1, fc2, on=['ticker', 'date'])
        Xm1 = fm1.calculate(data, tickers)

        fm2 = FeatureMerger(fc1, fc3, on='ticker')
        Xm2 = fm2.calculate(data, tickers)

        assert Xm1.shape[0] == X1.shape[0]
        assert Xm2.shape[0] == X1.shape[0]
        assert Xm1.shape[1] == X1.shape[1] + X2.shape[1]
        assert Xm2.shape[1] == X1.shape[1] + X3.shape[1]
        assert (Xm1.index == X1.index).min()
        assert (Xm2.index == X1.index).min()

        new_cols = Xm1.columns[:X1.shape[1]]
        old_cols = X1.columns
        for nc, oc in zip(new_cols, old_cols):
            assert (Xm1[nc] == X1[oc]).min()

        new_cols = Xm2.columns[:X1.shape[1]]
        old_cols = X1.columns
        for nc, oc in zip(new_cols, old_cols):
            assert (Xm2[nc] == X1[oc]).min()
Exemplo n.º 4
0
def _create_feature():
    fc1 = QuarterlyFeatures(data_key='quarterly',
                            columns=QUARTER_COLUMNS,
                            quarter_counts=QUARTER_COUNTS,
                            max_back_quarter=MAX_BACK_QUARTER)

    fc2 = BaseCompanyFeatures(data_key='base', cat_columns=CAT_COLUMNS)

    fc3 = QuarterlyDiffFeatures(data_key='quarterly',
                                columns=QUARTER_COLUMNS,
                                compare_quarter_idxs=COMPARE_QUARTER_IDXS,
                                max_back_quarter=MAX_BACK_QUARTER)

    feature = FeatureMerger(fc1, fc2, on='ticker')
    feature = FeatureMerger(feature, fc3, on=['ticker', 'date'])

    return feature
Exemplo n.º 5
0
    
    data_loader = SF1Data(config['sf1_data_path'])
    tickers_df = data_loader.load_base_data(
        currency=CURRENCY,
        scalemarketcap=SCALE_MARKETCAP)
    ticker_list = tickers_df['ticker'].unique().tolist()

    fc1 = QuarterlyFeatures(
        columns=QUARTER_COLUMNS,
        quarter_counts=QUARTER_COUNTS,
        max_back_quarter=MAX_BACK_QUARTER)

    fc2 = BaseCompanyFeatures(cat_columns=CAT_COLUMNS)
        
    fc3 = QuarterlyDiffFeatures(
        columns=QUARTER_COLUMNS,
        compare_quarter_idxs=COMPARE_QUARTER_IDXS,
        max_back_quarter=MAX_BACK_QUARTER)
                            
    feature = FeatureMerger(fc1, fc2, on='ticker')
    feature = FeatureMerger(feature, fc3, on=['ticker', 'date'])

    target = QuarterlyDiffTarget(col='marketcap')

    base_models = [lgbm.sklearn.LGBMRegressor(),
                   ctb.CatBoostRegressor(verbose=False)]
                   
    ensemble = EnsembleModel(base_models=base_models, 
                             bagging_fraction=BAGGING_FRACTION,
                             model_cnt=MODEL_CNT)

    model = GroupedOOFModel(ensemble,