def main(data_path: str = None): ''' Download commodities price history from https://blog.quandl.com/api-for-commodity-data Note: To download this dataset you need to register at quandl and paste token to `~/.ml_investment/secrets.json` Parameters ---------- data_path: path to folder in which downloaded data will be stored. OR ``None`` (downloading path will be as ``commodities_data_path`` from `~/.ml_investment/config.json` ''' if data_path is None: config = load_config() data_path = config['commodities_data_path'] downloader = QuandlDownloader(sleep_time=0.8) for code in tqdm(quandl_commodities_codes): downloader.single_download( 'datasets/{}'.format(code), '{}/{}.json'.format(data_path, code.replace('/', '_')))
def main(data_path: str=None): ''' Download daily price bars for base US stocks and indexes. Parameters ---------- data_path: path to folder in which downloaded data will be stored. OR ``None`` (downloading path will be as ``daily_bars_data_path`` from `~/.ml_investment/config.json` ''' if data_path is None: config = load_config() data_path = config['daily_bars_data_path'] global _data_path _data_path = data_path tickers = load_tickers()['base_us_stocks'] index_tickers = ['SPY', 'TLT', 'QQQ'] os.makedirs(data_path, exist_ok=True) p = Pool(8) for _ in tqdm(p.imap(_single_ticker_download, tickers + index_tickers)): None
def main(data_path :str=None): ''' Download quarterly fundamental data from https://www.quandl.com/databases/SF1/data Note: SF1 is paid, so you need to subscribe and paste quandl token to `~/.ml_investment/secrets.json` ``quandl_api_key`` Parameters ---------- data_path: path to folder in which downloaded data will be stored. OR ``None`` (downloading path will be as ``sf1_data_path`` from `~/.ml_investment/config.json` ''' if data_path is None: config = load_config() data_path = config['sf1_data_path'] downloader = QuandlDownloader(sleep_time=0.8) downloader.zip_download('datatables/SHARADAR/TICKERS?qopts.export=true', '{}/tickers.zip'.format(data_path)) base_df = SF1BaseData(data_path).load() tickers = base_df['ticker'].unique().tolist() downloader.ticker_download('datatables/SHARADAR/SF1?ticker={ticker}', tickers, save_dirpath='{}/core_fundamental'.format(data_path), skip_exists=False, batch_size=10, n_jobs=4) downloader.ticker_download('datatables/SHARADAR/DAILY?ticker={ticker}', tickers, save_dirpath='{}/daily'.format(data_path), skip_exists=False, batch_size=5, n_jobs=4)
def main(data_path: str = None): ''' Download quarterly and base data from https://finance.yahoo.com Parameters ---------- data_path: path to folder in which downloaded data will be stored. OR ``None`` (downloading path will be as ``yahoo_data_path`` from `~/.ml_investment/config.json` ''' if data_path is None: config = load_config() data_path = config['yahoo_data_path'] tickers = load_tickers()['base_us_stocks'] downloader = YahooDownloaderV1() downloader.download_quarterly_data(data_path, tickers) downloader.download_base_data(data_path, tickers)
def main(data_path: str = None): ''' Download quarterly and base data from https://finance.yahoo.com Parameters ---------- data_path: path to folder in which downloaded data will be stored. OR ``None`` (downloading path will be as ``yahoo_data_path`` from `~/.ml_investment/config.json` ''' if data_path is None: config = load_config() data_path = config['yahoo_data_path'] global _data_path _data_path = data_path tickers = load_tickers()['base_us_stocks'] os.makedirs('{}/quarterly'.format(data_path), exist_ok=True) os.makedirs('{}/base'.format(data_path), exist_ok=True) p = Pool(12) for _ in tqdm(p.imap(_single_ticker_download, tickers)): None
import lightgbm as lgbm import catboost as ctb from urllib.request import urlretrieve from ml_investment.utils import load_config, load_tickers from ml_investment.data_loaders.yahoo import YahooBaseData, YahooQuarterlyData from ml_investment.data_loaders.daily_bars import DailyBarsData from ml_investment.features import QuarterlyFeatures, BaseCompanyFeatures, \ FeatureMerger, DailyAggQuarterFeatures, \ QuarterlyDiffFeatures from ml_investment.targets import DailyAggTarget from ml_investment.models import TimeSeriesOOFModel, EnsembleModel, LogExpModel from ml_investment.metrics import median_absolute_relative_error, down_std_norm from ml_investment.pipelines import Pipeline from ml_investment.download_scripts import download_yahoo, download_daily_bars config = load_config() URL = 'https://github.com/fartuk/ml_investment/releases/download/weights/marketcap_down_std_yahoo.pickle' OUT_NAME = 'marketcap_down_std_yahoo' TARGET_HORIZON = 90 MAX_BACK_QUARTER = 2 FOLD_CNT = 5 QUARTER_COUNTS = [1, 2, 4] COMPARE_QUARTER_IDXS = [1, 4] CAT_COLUMNS = ["sector"] QUARTER_COLUMNS = [ 'totalRevenue', 'netIncome', 'cash', 'totalAssets', 'costOfRevenue',