Пример #1
0
def _create_data():
    data = {}
    data['quarterly'] = SF1QuarterlyData(config['sf1_data_path'])
    data['base'] = SF1BaseData(config['sf1_data_path'])
    data['daily'] = SF1DailyData(config['sf1_data_path'])

    return data
Пример #2
0
def main(data_path :str=None):
    '''
    Download quarterly fundamental data from
    https://www.quandl.com/databases/SF1/data

    Note:
        SF1 is paid, so you need to subscribe 
        and paste quandl token to `~/.ml_investment/secrets.json`
        ``quandl_api_key``

    Parameters
    ----------
    data_path:
        path to folder in which downloaded data will be stored.
        OR ``None`` (downloading path will be as ``sf1_data_path`` from 
        `~/.ml_investment/config.json`
    '''
    if data_path is None:
        config = load_config()
        data_path = config['sf1_data_path']

    downloader = QuandlDownloader(sleep_time=0.8)
    downloader.zip_download('datatables/SHARADAR/TICKERS?qopts.export=true',
                            '{}/tickers.zip'.format(data_path))

    base_df = SF1BaseData(data_path).load()
    tickers = base_df['ticker'].unique().tolist()
    
    downloader.ticker_download('datatables/SHARADAR/SF1?ticker={ticker}', tickers, 
                               save_dirpath='{}/core_fundamental'.format(data_path), 
                               skip_exists=False,  batch_size=10, n_jobs=4)

    downloader.ticker_download('datatables/SHARADAR/DAILY?ticker={ticker}', tickers, 
                               save_dirpath='{}/daily'.format(data_path), 
                               skip_exists=False, batch_size=5, n_jobs=4)
Пример #3
0
 def test_load(self, tickers):
     data_loader = SF1BaseData(config['sf1_data_path'])
     df = data_loader.load(tickers)
     assert type(df) == pd.DataFrame
     assert len(df) > 0
     assert 'ticker' in df.columns
     assert len(set(df['ticker'].values).difference(set(tickers))) == 0
Пример #4
0
 def test_load_no_param(self):
     data_loader = SF1BaseData(config['sf1_data_path'])
     df = data_loader.load()
     assert type(df) == pd.DataFrame
     assert len(df) > 0
     assert 'ticker' in df.columns
     assert df['ticker'].isnull().max() == False
Пример #5
0
def _create_data():
    data = {}
    data['quarterly'] = SF1QuarterlyData(config['sf1_data_path'])
    data['base'] = SF1BaseData(config['sf1_data_path'])
    data['daily'] = SF1DailyData(config['sf1_data_path'])
    data['commodities'] = QuandlCommoditiesData(
        config['commodities_data_path'])

    return data
Пример #6
0
def main():
    '''
    Default model training. Resulted model weights directory path 
    can be changed in `~/.ml_investment/config.json` ``models_path``
    '''
    pipeline = MarketcapDownStdSF1(pretrained=False)
    base_df = SF1BaseData(config['sf1_data_path']).load()
    tickers = base_df[(base_df['currency'] == CURRENCY) &\
                      (base_df['scalemarketcap'].apply(lambda x: x in SCALE_MARKETCAP))
                     ]['ticker'].values
    result = pipeline.fit(tickers[:200], median_absolute_relative_error)
    print(result)
    path = '{}/{}'.format(config['models_path'], OUT_NAME)
    pipeline.export_core(path)
Пример #7
0
                                           SF1DailyData
from synthetic_data import GenQuarterlyData, GenBaseData, GenDailyData

config = load_config()

gen_data = {
    'quarterly': GenQuarterlyData(),
    'base': GenBaseData(),
    'daily': GenDailyData(),
}

datas = [gen_data]
if os.path.exists(config['sf1_data_path']):
    sf1_data = {
        'quarterly': SF1QuarterlyData(config['sf1_data_path']),
        'base': SF1BaseData(config['sf1_data_path']),
        'daily': SF1DailyData(config['sf1_data_path']),
    }
    datas.append(sf1_data)

tickers = ['AAPL', 'TSLA', 'K', 'MAC', 'NVDA']


class TestBasePipeline:
    def _create_base_components(self):
        columns = ['revenue', 'netinc', 'ncf', 'ebitda', 'debt', 'fcf']
        f1 = QuarterlyFeatures(data_key='quarterly',
                               columns=columns,
                               quarter_counts=[2, 10],
                               max_back_quarter=1)