Пример #1
0
 def test_translate_currency_real(self, ticker):
     columns = [
         'equity', 'eps', 'revenue', 'netinccmn', 'cashneq', 'debt', 'ebit',
         'ebitda'
     ]
     data_loader = SF1Data(config['sf1_data_path'])
     quarterly_df = data_loader.load_quarterly_data(ticker, 10)
     trans_df = SF1Data.translate_currency(quarterly_df, columns)
     for col in columns:
         diff = trans_df['{}usd'.format(col)] - trans_df[col]
         diff = np.abs(diff.values / trans_df['{}usd'.format(col)].values)
         diff = diff[~np.isnan(diff)]
         assert diff.max() < 0.1
Пример #2
0
    def test_calculate(self, tickers):
        data_loader = SF1Data(config['sf1_data_path'])
        quarterly_df = data_loader.load_quarterly_data(tickers,
                                                       quarter_count=None)

        target = QuarterlyTarget('marketcap', quarter_shift=0)
        info_df = quarterly_df.drop_duplicates('ticker', keep='first') \
                                        [['ticker', 'date', 'marketcap']]

        y = target.calculate(data_loader, info_df[['ticker', 'date']])
        assert type(y) == pd.DataFrame
        assert 'y' in y.columns
        np.testing.assert_array_equal(y['y'].values,
                                      info_df['marketcap'].values)

        info_df = quarterly_df[['ticker', 'date', 'marketcap']]
        y = target.calculate(data_loader, info_df)
        np.testing.assert_array_equal(y['y'].values,
                                      info_df['marketcap'].values)

        target = QuarterlyTarget('marketcap', quarter_shift=1)
        info_df = quarterly_df[['ticker', 'date', 'marketcap']]
        y = target.calculate(data_loader, info_df)
        np.testing.assert_array_equal(y['y'].values,
                                      info_df.groupby('ticker')['marketcap']\
                                      .shift(1).astype('float').values)

        target = QuarterlyTarget('marketcap', quarter_shift=-3)
        info_df = quarterly_df[['ticker', 'date', 'marketcap']]
        y = target.calculate(data_loader, info_df)
        np.testing.assert_array_equal(y['y'].values,
                                      info_df.groupby('ticker')['marketcap']\
                                      .shift(-3).astype('float').values)
Пример #3
0
 def test_load_base_data(self):
     data_loader = SF1Data(config['sf1_data_path'])
     df = data_loader.load_base_data()
     assert type(df) == pd.DataFrame
     assert len(df) > 0
     assert 'ticker' in df.columns
     assert df['ticker'].isnull().max() == False
Пример #4
0
    def test_calculate(self, tickers):
        data_loader = SF1Data(config['sf1_data_path'])
        quarterly_df = data_loader.load_quarterly_data(tickers,
                                                       quarter_count=None)

        target = QuarterlyDiffTarget('marketcap', norm=False)
        info_df = quarterly_df[['ticker', 'date', 'marketcap']]
        y = target.calculate(data_loader, info_df)
        assert type(y) == pd.DataFrame
        assert 'y' in y.columns
        assert len(y) == len(info_df)
        gt = info_df['marketcap'].astype('float') - \
             info_df.groupby('ticker')['marketcap'].shift(-1).astype('float')
        np.testing.assert_array_equal(y['y'].values, gt.values)
Пример #5
0
    def test_translate_currency_synthetic(self, cnt):
        np.random.seed(0)
        currency_arr = np.array(range(1, cnt + 1))
        df = pd.DataFrame()
        df['debtusd'] = np.random.uniform(-1e5, 1e5, cnt)
        df['debt'] = df['debtusd'] * currency_arr
        df['ebitusd'] = np.random.uniform(-10, 10, cnt)
        noise = np.random.uniform(-0.1, 0.1, cnt)
        df['ebit'] = df['ebitusd'] * (currency_arr + noise)
        del_proba = np.random.uniform(0, 0.3)
        drop_mask = np.random.choice([True, False],
                                     cnt,
                                     p=[del_proba, 1 - del_proba])
        df.loc[drop_mask, 'ebitusd'] = None

        trans_df = SF1Data.translate_currency(df, ['debt', 'ebit'])
        for col in ['debt', 'ebit']:
            diff = trans_df['{}usd'.format(col)] - trans_df[col]
            diff = np.abs(diff.values / trans_df['{}usd'.format(col)].values)
            diff = diff[~np.isnan(diff)]
            assert diff.max() < 0.1
Пример #6
0
    def test_load_quarterly_data(self, tickers, quarter_count, dimension):
        data_loader = SF1Data(config['sf1_data_path'])
        quarterly_df = data_loader.load_quarterly_data(tickers, quarter_count,
                                                       dimension)

        assert type(quarterly_df) == pd.DataFrame
        assert 'ticker' in quarterly_df.columns
        assert 'date' in quarterly_df.columns

        # Data should be ordered by date inside ticker
        quarterly_df['date_'] = quarterly_df['date'].astype(np.datetime64)
        quarterly_df['def_order'] = range(len(quarterly_df))[::-1]
        expected_dates_order = quarterly_df.sort_values(
            ['ticker', 'date_'], ascending=False)['date'].values
        real_dates_order = quarterly_df.sort_values(
            ['ticker', 'def_order'], ascending=False)['date'].values
        np.testing.assert_array_equal(expected_dates_order, real_dates_order)

        for cnt in quarterly_df.groupby('ticker').size():
            assert cnt <= quarter_count

        assert (quarterly_df['dimension'] == dimension).min()
Пример #7
0
    def test_load_daily_data(self, tickers, back_days):
        data_loader = SF1Data(config['sf1_data_path'])
        daily_df = data_loader.load_daily_data(tickers, back_days=back_days)
        assert type(daily_df) == pd.DataFrame
        assert 'ticker' in daily_df.columns
        assert 'date' in daily_df.columns

        # Data should be ordered by date inside ticker
        daily_df['date_'] = daily_df['date'].astype(np.datetime64)
        daily_df['def_order'] = range(len(daily_df))[::-1]
        expected_dates_order = daily_df.sort_values(
            ['ticker', 'date_'], ascending=False)['date'].values
        real_dates_order = daily_df.sort_values(['ticker', 'def_order'],
                                                ascending=False)['date'].values
        np.testing.assert_array_equal(expected_dates_order, real_dates_order)

        # Should not be large holes in date
        diffs = daily_df.groupby('ticker')['date_'].shift(
            1) - daily_df['date_']
        assert (diffs.dropna() <= np.timedelta64(14, 'D')).min()

        if back_days is not None:
            for cnt in daily_df.groupby('ticker').size():
                assert cnt == back_days
Пример #8
0
            "sgna",
            "ncfx",
            "divyield",
            "currentratio",
            "netinccmn"
         ]

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    arg = parser.add_argument
    arg('--config_path', type=str)
    args = parser.parse_args()
    
    config = load_json(args.config_path)
    
    data_loader = SF1Data(config['sf1_data_path'])
    tickers_df = data_loader.load_base_data(
        currency=CURRENCY,
        scalemarketcap=SCALE_MARKETCAP)
    ticker_list = tickers_df['ticker'].unique().tolist()

    fc1 = QuarterlyFeatures(
        columns=QUARTER_COLUMNS,
        quarter_counts=QUARTER_COUNTS,
        max_back_quarter=MAX_BACK_QUARTER)

    fc2 = BaseCompanyFeatures(cat_columns=CAT_COLUMNS)
        
    fc3 = QuarterlyDiffFeatures(
        columns=QUARTER_COLUMNS,
        compare_quarter_idxs=COMPARE_QUARTER_IDXS,
Пример #9
0
import pytest
import pandas as pd
import numpy as np
from ml_investment.data import SF1Data
from ml_investment.features import calc_series_stats, QuarterlyFeatures, BaseCompanyFeatures,\
                     QuarterlyDiffFeatures, FeatureMerger, \
                     DailyAggQuarterFeatures
from ml_investment.utils import load_json, int_hash_of_str
from synthetic_data import GeneratedData
config = load_json('config.json')

loaders = [GeneratedData()]
if config['sf1_data_path'] is not None:
    loaders.append(SF1Data(config['sf1_data_path']))
    

@pytest.mark.parametrize(
    ["series", "norm", "expected"],
    [([10, 0, 1], False,
      {'_mean': 3.6666666666666665,
       '_median': 1.0,
       '_max': 10.0,
       '_min': 0.0,
       '_std': 4.4969125210773475}),
     ([10, -30, 1, 4, 15.2],  False,
      {'_mean': 0.039999999999999855,
       '_median': 4.0,
       '_max': 15.2,
       '_min': -30.0,
       '_std': 15.798936673080249}), 
     ([1],  False,