Exemplo n.º 1
0
    def test_calculate(self, tickers, columns, quarter_counts,
                       max_back_quarter):
        fc = QuarterlyFeatures(columns=columns,
                               quarter_counts=quarter_counts,
                               max_back_quarter=max_back_quarter)

        loaders = [Data(columns), SF1Data(config['sf1_data_path'])]
        for data_loader in loaders:
            X = fc.calculate(data_loader, tickers)

            assert type(X) == pd.DataFrame
            assert 'ticker' in X.index.names
            assert 'date' in X.index.names

            if type(data_loader) == Data:
                assert X.shape[0] == max_back_quarter * len(tickers)
            else:
                assert X.shape[0] <= max_back_quarter * len(tickers)

            assert X.shape[1] == 2 * len(calc_series_stats([])) * \
                                 len(columns) * len(quarter_counts)

            # Minimum can not be lower with reduction of quarter_count
            sorted_quarter_counts = np.sort(quarter_counts)
            for col in columns:
                for k in range(len(sorted_quarter_counts) - 1):
                    lower_count = sorted_quarter_counts[k]
                    higher_count = sorted_quarter_counts[k + 1]
                    l_col = 'quarter{}_{}_min'.format(lower_count, col)
                    h_col = 'quarter{}_{}_min'.format(higher_count, col)

                    assert (X[h_col] <= X[l_col]).min()

            # Maximum can not be higher with reduction of quarter_count
            sorted_quarter_counts = np.sort(quarter_counts)
            for col in columns:
                for k in range(len(sorted_quarter_counts) - 1):
                    lower_count = sorted_quarter_counts[k]
                    higher_count = sorted_quarter_counts[k + 1]
                    l_col = 'quarter{}_{}_max'.format(lower_count, col)
                    h_col = 'quarter{}_{}_max'.format(higher_count, col)

                    assert (X[h_col] >= X[l_col]).min()

            std_cols = [x for x in X.columns if '_std' in x]
            for col in std_cols:
                assert X[col].min() >= 0

            for col in columns:
                for count in quarter_counts:
                    min_col = 'quarter{}_{}_min'.format(count, col)
                    max_col = 'quarter{}_{}_max'.format(count, col)
                    mean_col = 'quarter{}_{}_mean'.format(count, col)
                    median_col = 'quarter{}_{}_median'.format(count, col)
                    assert (X[max_col] >= X[min_col]).min()
                    assert (X[max_col] >= X[mean_col]).min()
                    assert (X[max_col] >= X[median_col]).min()
                    assert (X[mean_col] >= X[min_col]).min()
                    assert (X[median_col] >= X[min_col]).min()
    def _create_base_components(self):
        columns = ['revenue', 'netinc', 'ncf', 'ebitda', 'debt', 'fcf']
        f1 = QuarterlyFeatures(columns=columns,
                               quarter_counts=[2, 10],
                               max_back_quarter=1)

        target = QuarterlyTarget(col='marketcap', quarter_shift=0)

        model = GroupedOOFModel(lgbm.sklearn.LGBMRegressor(),
                                group_column='ticker',
                                fold_cnt=4)

        return f1, target, model
Exemplo n.º 3
0
    def test_calculate(self, tickers):
        data_loader = SF1Data(config['sf1_data_path'])
        fc1 = QuarterlyFeatures(columns=['ebit'],
                                quarter_counts=[2],
                                max_back_quarter=10)

        fc2 = QuarterlyDiffFeatures(columns=['ebit', 'debt'],
                                    compare_quarter_idxs=[1, 4],
                                    max_back_quarter=10)

        fc3 = BaseCompanyFeatures(cat_columns=['sector', 'sicindustry'])

        X1 = fc1.calculate(data_loader, tickers)
        X2 = fc2.calculate(data_loader, tickers)
        X3 = fc3.calculate(data_loader, tickers)

        fm1 = FeatureMerger(fc1, fc2, on=['ticker', 'date'])
        Xm1 = fm1.calculate(data_loader, tickers)

        fm2 = FeatureMerger(fc1, fc3, on='ticker')
        Xm2 = fm2.calculate(data_loader, tickers)

        assert Xm1.shape[0] == X1.shape[0]
        assert Xm2.shape[0] == X1.shape[0]
        assert Xm1.shape[1] == X1.shape[1] + X2.shape[1]
        assert Xm2.shape[1] == X1.shape[1] + X3.shape[1]
        assert (Xm1.index == X1.index).min()
        assert (Xm2.index == X1.index).min()

        new_cols = Xm1.columns[:X1.shape[1]]
        old_cols = X1.columns
        for nc, oc in zip(new_cols, old_cols):
            assert (Xm1[nc] == X1[oc]).min()

        new_cols = Xm2.columns[:X1.shape[1]]
        old_cols = X1.columns
        for nc, oc in zip(new_cols, old_cols):
            assert (Xm2[nc] == X1[oc]).min()
Exemplo n.º 4
0
    def _create_pipeline(self):
        columns = ['revenue', 'netinc', 'ncf', 'ebitda', 'debt', 'fcf']
        features = QuarterlyFeatures(columns=columns,
                                     quarter_counts=[2, 10],
                                     max_back_quarter=1)

        target = QuarterlyTarget(col='marketcap', quarter_shift=0)

        model = GroupedOOFModel(lgbm.sklearn.LGBMRegressor(),
                                group_column='ticker',
                                fold_cnt=4)

        pipeline = BasePipeline(features,
                                target,
                                model,
                                metric=median_absolute_relative_error)

        return pipeline
    def test_execute_simple(self, data_loader):
        columns = ['revenue', 'netinc', 'ncf', 'ebitda', 'debt', 'fcf']
        f1 = QuarterlyFeatures(columns=columns,
                               quarter_counts=[2, 10],
                               max_back_quarter=1)

        target1 = QuarterlyTarget(col='marketcap', quarter_shift=0)
        target2 = QuarterlyTarget(col='marketcap', quarter_shift=-1)

        model = lgbm.sklearn.LGBMRegressor()

        pipeline1 = BasePipeline(feature=f1,
                                 target=target1,
                                 model=model,
                                 metric=median_absolute_relative_error,
                                 out_name='p1')

        pipeline2 = BasePipeline(feature=f1,
                                 target=target2,
                                 model=model,
                                 metric=median_absolute_relative_error,
                                 out_name='p2')

        pipeline3 = QuarterlyLoadPipeline(['ticker', 'date', 'marketcap'])

        pipeline1.fit(data_loader, tickers)
        pipeline2.fit(data_loader, tickers)

        merge1 = ExecuteMergePipeline(
            pipeline_list=[pipeline1, pipeline2, pipeline3],
            on=['ticker', 'date'])

        df1 = pipeline1.execute(data_loader, tickers)
        df2 = pipeline2.execute(data_loader, tickers)
        df3 = pipeline3.execute(data_loader, tickers)

        df = merge1.execute(data_loader, tickers)
        assert type(df) == pd.DataFrame
        assert len(df) == len(df1)
        np.testing.assert_array_equal(
            df.columns, ['ticker', 'date', 'p1', 'p2', 'marketcap'])

        np.testing.assert_array_equal(df1['p1'], df['p1'])
        np.testing.assert_array_equal(df2['p2'], df['p2'])
Exemplo n.º 6
0
    parser = argparse.ArgumentParser()
    arg = parser.add_argument
    arg('--config_path', type=str)
    args = parser.parse_args()

    config = load_json(args.config_path)
    pipeline_config = config['pipelines']['marketcap_down_std']

    data_loader = SF1Data(config['sf1_data_path'])
    tickers_df = data_loader.load_base_data(
        currency=pipeline_config['currency'],
        scalemarketcap=pipeline_config['scalemarketcap'])
    ticker_list = tickers_df['ticker'].unique().tolist()

    fc1 = QuarterlyFeatures(
        columns=pipeline_config['quarter_columns'],
        quarter_counts=pipeline_config['quarter_counts'],
        max_back_quarter=pipeline_config['max_back_quarter'])

    fc2 = BaseCompanyFeatures(cat_columns=pipeline_config['cat_columns'])

    fc3 = QuarterlyDiffFeatures(
        columns=pipeline_config['quarter_columns'],
        compare_quarter_idxs=pipeline_config['compare_quarter_idxs'],
        max_back_quarter=pipeline_config['max_back_quarter'])

    fc4 = DailyAggQuarterFeatures(
        columns=pipeline_config['daily_agg_columns'],
        agg_day_counts=pipeline_config['agg_day_counts'],
        max_back_quarter=pipeline_config['max_back_quarter'])

    feature = FeatureMerger(fc1, fc2, on='ticker')
CAT_COLUMNS = ["sector", "sicindustry"]
QUARTER_COLUMNS = [
    "revenue", "netinc", "ncf", "assets", "ebitda", "debt", "fcf", "gp",
    "workingcapital", "cashneq", "rnd", "sgna", "ncfx", "divyield",
    "currentratio", "netinccmn"
]

if __name__ == '__main__':
    config = load_json('config.json')
    data_loader = SF1Data(config['sf1_data_path'])
    tickers_df = data_loader.load_base_data(currency=CURRENCY,
                                            scalemarketcap=SCALE_MARKETCAP)
    ticker_list = tickers_df['ticker'].unique().tolist()

    fc1 = QuarterlyFeatures(columns=QUARTER_COLUMNS,
                            quarter_counts=QUARTER_COUNTS,
                            max_back_quarter=MAX_BACK_QUARTER)

    fc2 = BaseCompanyFeatures(cat_columns=CAT_COLUMNS)

    # Daily agss on marketcap and pe is possible here because it
    # normalized and there are no leakage.
    fc3 = DailyAggQuarterFeatures(columns=DAILY_AGG_COLUMNS,
                                  agg_day_counts=AGG_DAY_COUNTS,
                                  max_back_quarter=MAX_BACK_QUARTER)

    feature = FeatureMerger(fc1, fc2, on='ticker')
    feature = FeatureMerger(feature, fc3, on=['ticker', 'date'])

    target = QuarterlyTarget(col='marketcap', quarter_shift=0)