示例#1
0
def _create_feature():
    fc1 = QuarterlyFeatures(data_key='quarterly',
                            columns=QUARTER_COLUMNS,
                            quarter_counts=QUARTER_COUNTS,
                            max_back_quarter=MAX_BACK_QUARTER)

    fc2 = BaseCompanyFeatures(data_key='base', cat_columns=CAT_COLUMNS)

    fc3 = QuarterlyDiffFeatures(data_key='quarterly',
                                columns=QUARTER_COLUMNS,
                                compare_quarter_idxs=COMPARE_QUARTER_IDXS,
                                max_back_quarter=MAX_BACK_QUARTER)

    fc4 = DailyAggQuarterFeatures(daily_data_key='commodities',
                                  quarterly_data_key='quarterly',
                                  columns=['price'],
                                  agg_day_counts=AGG_DAY_COUNTS,
                                  max_back_quarter=MAX_BACK_QUARTER,
                                  daily_index=COMMODITIES_CODES)

    feature = FeatureMerger(fc1, fc2, on='ticker')
    feature = FeatureMerger(feature, fc3, on=['ticker', 'date'])
    feature = FeatureMerger(feature, fc4, on=['ticker', 'date'])

    return feature
def _create_feature():
    fc1 = QuarterlyFeatures(data_key='quarterly',
                            columns=QUARTER_COLUMNS,
                            quarter_counts=QUARTER_COUNTS,
                            max_back_quarter=MAX_BACK_QUARTER)

    fc2 = BaseCompanyFeatures(data_key='base', cat_columns=CAT_COLUMNS)

    # Daily agss on marketcap and pe is possible here because it
    # normalized and there are no leakage.
    fc3 = DailyAggQuarterFeatures(daily_data_key='daily',
                                  quarterly_data_key='quarterly',
                                  columns=DAILY_AGG_COLUMNS,
                                  agg_day_counts=AGG_DAY_COUNTS,
                                  max_back_quarter=MAX_BACK_QUARTER)

    fc4 = DailyAggQuarterFeatures(daily_data_key='commodities',
                                  quarterly_data_key='quarterly',
                                  columns=['price'],
                                  agg_day_counts=AGG_DAY_COUNTS,
                                  max_back_quarter=MAX_BACK_QUARTER,
                                  daily_index=COMMODITIES_CODES)

    feature = FeatureMerger(fc1, fc2, on='ticker')
    feature = FeatureMerger(feature, fc3, on=['ticker', 'date'])
    feature = FeatureMerger(feature, fc4, on=['ticker', 'date'])

    return feature
示例#3
0
    def test_calculate(self, data, tickers, cat_columns):
        fc = BaseCompanyFeatures(data_key='base', cat_columns=cat_columns)
        X = fc.calculate(data, tickers)

        assert type(X) == pd.DataFrame
        assert 'ticker' in X.index.names
        base_data = data['base'].load(tickers)
        base_data = base_data[base_data['ticker'].apply(
            lambda x: x in tickers)]
        for col in cat_columns:
            assert len(base_data[col].unique()) ==\
                   len(X[col].unique())

        # Reuse fitted after first calculate fc
        new_X = fc.calculate(data, tickers)
        for col in cat_columns:
            assert (new_X[col] == X[col]).min()
示例#4
0
def _create_feature():
    fc1 = QuarterlyFeatures(data_key='quarterly',
                            columns=QUARTER_COLUMNS,
                            quarter_counts=QUARTER_COUNTS,
                            max_back_quarter=1)

    fc2 = BaseCompanyFeatures(data_key='base', cat_columns=CAT_COLUMNS)

    feature = FeatureMerger(fc1, fc2, on='ticker')

    return feature
示例#5
0
    def test_calculate(self, data, tickers):
        fc1 = QuarterlyFeatures(data_key='quarterly',
                                columns=['ebit'],
                                quarter_counts=[2],
                                max_back_quarter=10)

        fc2 = QuarterlyDiffFeatures(data_key='quarterly',
                                    columns=['ebit', 'debt'],
                                    compare_quarter_idxs=[1, 4],
                                    max_back_quarter=10)

        fc3 = BaseCompanyFeatures(data_key='base',
                                  cat_columns=['sector', 'sicindustry'])

        X1 = fc1.calculate(data, tickers)
        X2 = fc2.calculate(data, tickers)
        X3 = fc3.calculate(data, tickers)

        fm1 = FeatureMerger(fc1, fc2, on=['ticker', 'date'])
        Xm1 = fm1.calculate(data, tickers)

        fm2 = FeatureMerger(fc1, fc3, on='ticker')
        Xm2 = fm2.calculate(data, tickers)

        assert Xm1.shape[0] == X1.shape[0]
        assert Xm2.shape[0] == X1.shape[0]
        assert Xm1.shape[1] == X1.shape[1] + X2.shape[1]
        assert Xm2.shape[1] == X1.shape[1] + X3.shape[1]
        assert (Xm1.index == X1.index).min()
        assert (Xm2.index == X1.index).min()

        new_cols = Xm1.columns[:X1.shape[1]]
        old_cols = X1.columns
        for nc, oc in zip(new_cols, old_cols):
            assert (Xm1[nc] == X1[oc]).min()

        new_cols = Xm2.columns[:X1.shape[1]]
        old_cols = X1.columns
        for nc, oc in zip(new_cols, old_cols):
            assert (Xm2[nc] == X1[oc]).min()
示例#6
0
    def test_calculate(self, data_loader, tickers, cat_columns):                             
        fc = BaseCompanyFeatures(cat_columns=cat_columns)
        X = fc.calculate(data_loader, tickers)

        assert type(X) == pd.DataFrame
        assert 'ticker' in X.index.names
        base_data = data_loader.load_base_data()
        for col in cat_columns:
            assert len(base_data[col].unique()) ==\
                   len(fc.col_to_encoder[col].classes_)

        # Reuse fitted after first calculate fc
        for col in cat_columns:
            assert col in fc.col_to_encoder
        new_X = fc.calculate(data_loader, tickers)
        for col in cat_columns:
            assert (new_X[col] == X[col]).min()

        wd = WrapData(data_loader, tickers)
        new_X = fc.calculate(wd, tickers)
        for col in cat_columns:
            assert (new_X[col] == X[col]).min()
示例#7
0
def _create_feature():
    fc1 = QuarterlyFeatures(data_key='quarterly',
                            columns=QUARTER_COLUMNS,
                            quarter_counts=QUARTER_COUNTS,
                            max_back_quarter=MAX_BACK_QUARTER)

    fc2 = BaseCompanyFeatures(data_key='base', cat_columns=CAT_COLUMNS)

    fc3 = QuarterlyDiffFeatures(data_key='quarterly',
                                columns=QUARTER_COLUMNS,
                                compare_quarter_idxs=COMPARE_QUARTER_IDXS,
                                max_back_quarter=MAX_BACK_QUARTER)

    feature = FeatureMerger(fc1, fc2, on='ticker')
    feature = FeatureMerger(feature, fc3, on=['ticker', 'date'])

    return feature
示例#8
0
    args = parser.parse_args()
    
    config = load_json(args.config_path)
    
    data_loader = SF1Data(config['sf1_data_path'])
    tickers_df = data_loader.load_base_data(
        currency=CURRENCY,
        scalemarketcap=SCALE_MARKETCAP)
    ticker_list = tickers_df['ticker'].unique().tolist()

    fc1 = QuarterlyFeatures(
        columns=QUARTER_COLUMNS,
        quarter_counts=QUARTER_COUNTS,
        max_back_quarter=MAX_BACK_QUARTER)

    fc2 = BaseCompanyFeatures(cat_columns=CAT_COLUMNS)
        
    fc3 = QuarterlyDiffFeatures(
        columns=QUARTER_COLUMNS,
        compare_quarter_idxs=COMPARE_QUARTER_IDXS,
        max_back_quarter=MAX_BACK_QUARTER)
                            
    feature = FeatureMerger(fc1, fc2, on='ticker')
    feature = FeatureMerger(feature, fc3, on=['ticker', 'date'])

    target = QuarterlyDiffTarget(col='marketcap')

    base_models = [lgbm.sklearn.LGBMRegressor(),
                   ctb.CatBoostRegressor(verbose=False)]
                   
    ensemble = EnsembleModel(base_models=base_models,