def chunk_to_df(self, exchange_name, symbol, data_frequency, period): exchange = get_exchange(exchange_name) asset = exchange.get_asset(symbol) filename = get_bcolz_chunk( exchange_name=exchange_name, symbol=symbol, data_frequency=data_frequency, period=period ) reader = BcolzExchangeBarReader(rootdir=filename, data_frequency=data_frequency) # metadata = BcolzMinuteBarMetadata.read(filename) start = reader.first_trading_day end = reader.last_available_dt if data_frequency == 'daily': end = end - pd.Timedelta(hours=23, minutes=59) print(start, end, data_frequency) arrays = reader.load_raw_arrays(self.columns, start, end, [asset.sid, ]) bundle = ExchangeBundle(exchange_name) periods = bundle.get_calendar_periods_range( start, end, data_frequency ) return get_df_from_arrays(arrays, periods)
def test_ingest_minute(self): data_frequency = 'minute' exchange_name = 'poloniex' exchange = get_exchange(exchange_name) exchange_bundle = ExchangeBundle(exchange) assets = [exchange.get_asset('eth_btc')] start = pd.to_datetime('2016-03-01', utc=True) end = pd.to_datetime('2017-11-1', utc=True) log.info('ingesting exchange bundle {}'.format(exchange_name)) exchange_bundle.ingest( data_frequency=data_frequency, include_symbols=','.join([asset.symbol for asset in assets]), # include_symbols=None, exclude_symbols=None, start=start, end=end, show_progress=True) reader = exchange_bundle.get_reader(data_frequency) for asset in assets: arrays = reader.load_raw_arrays(sids=[asset.sid], fields=['close'], start_dt=start, end_dt=end) print('found {} rows for {} ingestion\n{}'.format( len(arrays[0]), asset.symbol, arrays[0])) pass
def download_from_exchange(self, asset, data_frequency, period): if data_frequency != 'minute': raise Exception( "data frequency '{}' is not supported yet for exchange data download" .format(data_frequency)) if self.exchange is None: # Avoid circular dependencies from catalyst.exchange.utils.factory import get_exchange self.exchange = get_exchange(self.exchange_name) pd_period = pd.Period(period) start_dt = pd_period.start_time.tz_localize('UTC') now = pd.Timestamp.now('UTC') total_minutes = round( ((pd_period.end_time - pd_period.start_time).total_seconds() / 60)) candles = [] minutes_to_fetch = total_minutes fetched_minutes = 0 while minutes_to_fetch > 0: request_start_date = start_dt + timedelta(minutes=fetched_minutes) if request_start_date > now: break if fetched_minutes > 0: time.sleep(DOWNLOAD_REQUEST_DELAY) request_size = 1000 if minutes_to_fetch > 1000 else minutes_to_fetch results = self.exchange.get_candles(freq='1T', assets=asset, start_dt=request_start_date, bar_count=request_size) if len(results) != 0: minutes_diff = int( int((results[-1]['last_traded'] - request_start_date).total_seconds()) / 60) + 1 candles.extend(results) else: # if we don't have any data, let's just jump to the next request until we find the first minute minutes_diff = 1000 minutes_to_fetch -= minutes_diff fetched_minutes += minutes_diff if len(candles) == 0: log.warn("[{}] No candles found in period: {}", asset.symbol, period) return pd.DataFrame() df = get_asset_candles_df( candles=candles, fields=['open', 'high', 'low', 'close', 'volume']) return df
def ingest(self, data_frequency, include_symbols=None, exclude_symbols=None, start=None, end=None, csv=None, show_progress=True, show_breakdown=True, show_report=True, from_exchange=False, exclude_current_month=False): """ Inject data based on specified parameters. Parameters ---------- data_frequency: str include_symbols: str exclude_symbols: str start: pd.Timestamp end: pd.Timestamp show_progress: bool environ: """ if from_exchange: log.warning("Ingesting data directly from the exchange: '{}'", self.exchange_name) if csv is not None: self.ingest_csv(csv, data_frequency) else: if self.exchange is None: # Avoid circular dependencies from catalyst.exchange.utils.factory import get_exchange self.exchange = get_exchange(self.exchange_name) assets = get_assets(self.exchange, include_symbols, exclude_symbols) self.update_symbols_file(get_assets(self.exchange, None, None)) for frequency in data_frequency.split(','): self.ingest_assets(assets=assets, data_frequency=frequency, start_dt=start, end_dt=end, show_progress=show_progress, show_breakdown=show_breakdown, show_report=show_report, from_exchange=from_exchange, exclude_current_month=exclude_current_month)
def test_validate_bundles(self): # exchange_population = 3 asset_population = 3 data_frequency = random.choice(['minute']) # bundle = 'dailyBundle' if data_frequency # == 'daily' else 'minuteBundle' # exchanges = select_random_exchanges( # population=exchange_population, # features=[bundle], # ) # Type: list[Exchange] exchanges = [get_exchange('poloniex', skip_init=True)] data_portal = TestSuiteBundle.get_data_portal(exchanges) for exchange in exchanges: exchange.init() frequencies = exchange.get_candle_frequencies(data_frequency) freq = random.sample(frequencies, 1)[0] rnd = random.SystemRandom() # field = rnd.choice(['open', 'high', 'low', 'close', 'volume']) field = rnd.choice(['volume']) bar_count = random.randint(3, 6) assets = select_random_assets( exchange.assets, asset_population ) end_dt = None for asset in assets: attribute = 'end_{}'.format(data_frequency) asset_end_dt = getattr(asset, attribute) if end_dt is None or asset_end_dt < end_dt: end_dt = asset_end_dt end_dt = end_dt + timedelta(minutes=3) dt_range = pd.date_range( end=end_dt, periods=bar_count, freq=freq ) self.compare_bundle_with_exchange( exchange=exchange, assets=assets, end_dt=dt_range[-1], bar_count=bar_count, freq=freq, data_frequency=data_frequency, data_portal=data_portal, field=field, ) pass
def test_merge_ctables(self): exchange_name = 'bittrex' # Switch between daily and minute for testing # data_frequency = 'daily' data_frequency = 'daily' exchange = get_exchange(exchange_name) assets = [ exchange.get_asset('eth_btc'), exchange.get_asset('etc_btc'), exchange.get_asset('wings_eth'), ] start = pd.to_datetime('2017-9-1', utc=True) end = pd.to_datetime('2017-9-30', utc=True) exchange_bundle = ExchangeBundle(exchange) writer = exchange_bundle.get_writer(start, end, data_frequency) # In the interest of avoiding abstractions, this is writing a chunk # to the ctable. It does not include the logic which creates chunks. for asset in assets: exchange_bundle.ingest_ctable( asset=asset, data_frequency=data_frequency, # period='2017-9', period='2017', # Dont't forget to update if you change your dates start_dt=start, end_dt=end, writer=writer, empty_rows_behavior='strip') # In daily mode, this returns an error. It appears that writing # a second asset in the same date range removed the first asset. # In minute mode, the data is there too. This signals that the minute # writer / reader is more powerful. This explains why I did not # encounter these problems as I have been focusing on minute data. reader = exchange_bundle.get_reader(data_frequency) for asset in assets: # Since this pair was loaded last. It should be here in daily mode. arrays = reader.load_raw_arrays(sids=[asset.sid], fields=['close'], start_dt=start, end_dt=end) print('found {} rows for {} ingestion\n{}'.format( len(arrays[0]), asset.symbol, arrays[0])) pass
def test_ingest_candles(self): exchange_name = 'bitfinex' data_frequency = 'minute' exchange = get_exchange(exchange_name) bundle = ExchangeBundle(exchange) assets = [exchange.get_asset('iot_btc')] end_dt = pd.to_datetime('2017-10-20', utc=True) bar_count = 100 start_dt = get_start_dt(end_dt, bar_count, data_frequency) candles = exchange.get_candles(assets=assets, start_dt=start_dt, end_dt=end_dt, bar_count=bar_count, freq='1T') writer = bundle.get_writer(start_dt, end_dt, data_frequency) for asset in assets: dates = [candle['last_traded'] for candle in candles[asset]] values = dict() for field in ['open', 'high', 'low', 'close', 'volume']: values[field] = [candle[field] for candle in candles[asset]] periods = bundle.get_calendar_periods_range( start_dt, end_dt, data_frequency) df = pd.DataFrame(values, index=dates) df = df.loc[periods].fillna(method='ffill') # TODO: why do I get an extra bar? bundle.ingest_df(ohlcv_df=df, data_frequency=data_frequency, asset=asset, writer=writer, empty_rows_behavior='raise', duplicates_behavior='raise') bundle_series = bundle.get_history_window_series( assets=assets, end_dt=end_dt, bar_count=bar_count, field='close', data_frequency=data_frequency, reset_reader=True) df = pd.DataFrame(bundle_series) print('\n' + df_to_string(df)) pass
def test_validate_data(self): exchange_name = 'bitfinex' data_frequency = 'minute' exchange = get_exchange(exchange_name) exchange_bundle = ExchangeBundle(exchange) assets = [exchange.get_asset('iot_btc')] end_dt = pd.to_datetime('2017-9-2 1:00', utc=True) bar_count = 60 bundle_series = exchange_bundle.get_history_window_series( assets=assets, end_dt=end_dt, bar_count=bar_count * 5, field='close', data_frequency='minute', ) candles = exchange.get_candles(assets=assets, end_dt=end_dt, bar_count=bar_count, freq='1T') start_dt = get_start_dt(end_dt, bar_count, data_frequency) frames = [] for asset in assets: bundle_df = pd.DataFrame( data=dict(bundle_price=bundle_series[asset]), index=bundle_series[asset].index) exchange_series = exchange.get_series_from_candles( candles=candles[asset], start_dt=start_dt, end_dt=end_dt, data_frequency=data_frequency, field='close') exchange_df = pd.DataFrame( data=dict(exchange_price=exchange_series), index=exchange_series.index) df = exchange_df.join(bundle_df, how='left') df['last_traded'] = df.index df['asset'] = asset.symbol df.set_index(['asset', 'last_traded'], inplace=True) frames.append(df) df = pd.concat(frames) print('\n' + df_to_string(df)) pass
def test_ingest_minute_all(self): exchange_name = 'bitfinex' # start = pd.to_datetime('2017-09-01', utc=True) start = pd.to_datetime('2017-10-01', utc=True) end = pd.to_datetime('2017-10-05', utc=True) exchange_bundle = ExchangeBundle(get_exchange(exchange_name)) log.info('ingesting exchange bundle {}'.format(exchange_name)) exchange_bundle.ingest(data_frequency='minute', exclude_symbols=None, start=start, end=end, show_progress=True) pass
def _build_exchanges_dict(exchange, live, simulate_orders, base_currency): exchange_name = exchange if exchange_name is None: raise ValueError('Please specify at least one exchange.') exchange_list = [x.strip().lower() for x in exchange.split(',')] exchanges = { exchange_name: get_exchange(exchange_name=exchange_name, base_currency=base_currency, must_authenticate=(live and not simulate_orders)) for exchange_name in exchange_list } return exchanges
def test_validate_bundles(self): # exchange_population = 3 asset_population = 3 data_frequency = random.choice(['minute']) # bundle = 'dailyBundle' if data_frequency # == 'daily' else 'minuteBundle' # exchanges = select_random_exchanges( # population=exchange_population, # features=[bundle], # ) # Type: list[Exchange] exchanges = [get_exchange('poloniex', skip_init=True)] data_portal = TestSuiteBundle.get_data_portal(exchanges) for exchange in exchanges: exchange.init() frequencies = exchange.get_candle_frequencies(data_frequency) freq = random.sample(frequencies, 1)[0] bar_count = random.randint(1, 10) assets = select_random_assets( exchange.assets, asset_population ) end_dt = None for asset in assets: attribute = 'end_{}'.format(data_frequency) asset_end_dt = getattr(asset, attribute) if end_dt is None or asset_end_dt < end_dt: end_dt = asset_end_dt end_dt = end_dt + timedelta(minutes=3) dt_range = pd.date_range( end=end_dt, periods=bar_count, freq=freq ) self.compare_bundle_with_exchange( exchange=exchange, assets=assets, end_dt=dt_range[-1], bar_count=bar_count, freq=freq, data_frequency=data_frequency, data_portal=data_portal, ) pass
def main_bundle_to_csv(self): exchange_name = 'poloniex' data_frequency = 'minute' exchange = get_exchange(exchange_name) asset = exchange.get_asset('eth_btc') start_dt = pd.to_datetime('2016-5-31', utc=True) end_dt = pd.to_datetime('2016-6-1', utc=True) self._bundle_to_csv(asset=asset, exchange_name=exchange.name, data_frequency=data_frequency, filename='{}_{}_{}'.format(exchange_name, data_frequency, asset.symbol), start_dt=start_dt, end_dt=end_dt)
def test_ingest_daily(self): exchange_name = 'bitfinex' data_frequency = 'minute' include_symbols = 'neo_btc' # exchange_name = 'poloniex' # data_frequency = 'daily' # include_symbols = 'eth_btc' # start = pd.to_datetime('2017-1-1', utc=True) # end = pd.to_datetime('2017-10-16', utc=True) # periods = get_periods_range(start, end, data_frequency) start = None end = None exchange = get_exchange(exchange_name) exchange_bundle = ExchangeBundle(exchange) log.info('ingesting exchange bundle {}'.format(exchange_name)) exchange_bundle.ingest(data_frequency=data_frequency, include_symbols=include_symbols, exclude_symbols=None, start=start, end=end, show_progress=True) symbols = include_symbols.split(',') assets = [] for pair_symbol in symbols: assets.append(exchange.get_asset(pair_symbol)) reader = exchange_bundle.get_reader(data_frequency) start_dt = reader.first_trading_day end_dt = reader.last_available_dt if data_frequency == 'daily': end_dt = end_dt - pd.Timedelta(hours=23, minutes=59) for asset in assets: arrays = reader.load_raw_arrays(sids=[asset.sid], fields=['close'], start_dt=start_dt, end_dt=end_dt) print('found {} rows for {} ingestion\n{}'.format( len(arrays[0]), asset.symbol, arrays[0])) pass
def bundle_to_csv(self): exchange_name = 'poloniex' data_frequency = 'minute' period = '2017-01' symbol = 'eth_btc' exchange = get_exchange(exchange_name) asset = exchange.get_asset(symbol) path = get_bcolz_chunk(exchange_name=exchange.name, symbol=asset.symbol, data_frequency=data_frequency, period=period) self._bundle_to_csv(asset=asset, exchange_name=exchange.name, data_frequency=data_frequency, path=path, filename=period) pass
def test_validate_bundles(self): # exchange_population = 3 asset_population = 3 data_frequency = random.choice(['minute', 'daily']) # bundle = 'dailyBundle' if data_frequency # == 'daily' else 'minuteBundle' # exchanges = select_random_exchanges( # population=exchange_population, # features=[bundle], # ) # Type: list[Exchange] exchanges = [get_exchange('bitfinex', skip_init=True)] data_portal = TestSuiteBundle.get_data_portal( [exchange.name for exchange in exchanges]) for exchange in exchanges: exchange.init() frequencies = exchange.get_candle_frequencies(data_frequency) freq = random.sample(frequencies, 1)[0] bar_count = random.randint(1, 10) assets = select_random_assets(exchange.assets, asset_population) end_dt = None for asset in assets: attribute = 'end_{}'.format(data_frequency) asset_end_dt = getattr(asset, attribute) if end_dt is None or asset_end_dt < end_dt: end_dt = asset_end_dt dt_range = pd.date_range(end=end_dt, periods=bar_count, freq=freq) self.compare_bundle_with_exchange( exchange=exchange, assets=assets, end_dt=dt_range[-1], bar_count=bar_count, freq=freq, data_frequency=data_frequency, data_portal=data_portal, ) pass
def ingest(self, data_frequency, include_symbols=None, exclude_symbols=None, start=None, end=None, csv=None, show_progress=True, show_breakdown=True, show_report=True): """ Inject data based on specified parameters. Parameters ---------- data_frequency: str include_symbols: str exclude_symbols: str start: pd.Timestamp end: pd.Timestamp show_progress: bool environ: """ if csv is not None: self.ingest_csv(csv, data_frequency) else: if self.exchange is None: # Avoid circular dependencies from catalyst.exchange.utils.factory import get_exchange self.exchange = get_exchange(self.exchange_name) assets = get_assets(self.exchange, include_symbols, exclude_symbols) for frequency in data_frequency.split(','): self.ingest_assets(assets=assets, data_frequency=frequency, start_dt=start, end_dt=end, show_progress=show_progress, show_breakdown=show_breakdown, show_report=show_report)
def test_ingest_exchange(self): # exchange_name = 'bitfinex' # data_frequency = 'daily' # include_symbols = 'neo_btc,bch_btc,eth_btc' exchange_name = 'bitfinex' data_frequency = 'minute' exchange = get_exchange(exchange_name) exchange_bundle = ExchangeBundle(exchange) log.info('ingesting exchange bundle {}'.format(exchange_name)) exchange_bundle.ingest(data_frequency=data_frequency, include_symbols=None, exclude_symbols=None, start=None, end=None, show_progress=True) pass
def test_validate_last_candle(self): # exchange_population = 3 asset_population = 3 data_frequency = random.choice(['minute']) # bundle = 'dailyBundle' if data_frequency # == 'daily' else 'minuteBundle' # exchanges = select_random_exchanges( # population=exchange_population, # features=[bundle], # ) # Type: list[Exchange] exchanges = [get_exchange('poloniex', skip_init=True)] data_portal = TestSuiteBundle.get_data_portal(exchanges) for exchange in exchanges: exchange.init() frequencies = exchange.get_candle_frequencies(data_frequency) freq = random.sample(frequencies, 1)[0] assets = select_random_assets( exchange.assets, asset_population ) end_dt = None for asset in assets: attribute = 'end_{}'.format(data_frequency) asset_end_dt = getattr(asset, attribute) if end_dt is None or asset_end_dt < end_dt: end_dt = asset_end_dt end_dt = end_dt + timedelta(minutes=3) self.compare_current_with_last_candle( exchange=exchange, assets=assets, end_dt=end_dt, freq=freq, data_frequency=data_frequency, data_portal=data_portal, ) pass
def load_adjusted_array(self, columns, dates, assets, mask): # load_adjusted_array is called with dates on which the user's algo # will be shown data, which means we need to return the data that would # be known at the start of each date. We assume that the latest data # known on day N is the data from day (N - 1), so we shift all query # dates back by a day. start_date, end_date = _shift_dates( self._all_sessions, dates[0], dates[-1], shift=1, ) colnames = [c.name for c in columns] if len(assets) == 0: raise ValueError('Pipeline cannot load data with eligible assets.') exchange_names = [] for asset in assets: if asset.exchange not in exchange_names: exchange_names.append(asset.exchange) exchange = get_exchange(exchange_names[0]) reader = exchange.bundle.get_reader(self.data_frequency) raw_arrays = reader.load_raw_arrays( colnames, start_date, end_date, assets, ) out = {} for c, c_raw in zip(columns, raw_arrays): out[c] = AdjustedArray( c_raw.astype(c.dtype), mask, {}, c.missing_value, ) return out
def test_ingest_csv(self): data_frequency = 'minute' exchange_name = 'bittrex' path = '/Users/fredfortier/Dropbox/Enigma/Data/bittrex_bat_eth.csv' exchange_bundle = ExchangeBundle(exchange_name) exchange_bundle.ingest_csv(path, data_frequency) exchange = get_exchange(exchange_name) asset = exchange.get_asset('bat_eth') start_dt = pd.to_datetime('2017-6-3', utc=True) end_dt = pd.to_datetime('2017-8-3 19:24', utc=True) self._bundle_to_csv(asset=asset, exchange_name=exchange.name, data_frequency=data_frequency, filename='{}_{}_{}'.format(exchange_name, data_frequency, asset.symbol), start_dt=start_dt, end_dt=end_dt) pass
def load_crypto_market_data(trading_day=None, trading_days=None, bm_symbol=None, bundle=None, bundle_data=None, environ=None, exchange=None, start_dt=None, end_dt=None): if trading_day is None: trading_day = get_calendar('OPEN').trading_day # TODO: consider making configurable bm_symbol = 'btc_usd' # if trading_days is None: # trading_days = get_calendar('OPEN').schedule # if start_dt is None: start_dt = get_calendar('OPEN').first_trading_session if end_dt is None: end_dt = pd.Timestamp.utcnow() # We expect to have benchmark and treasury data that's current up until # **two** full trading days prior to the most recently completed trading # day. # Example: # On Thu Oct 22 2015, the previous completed trading day is Wed Oct 21. # However, data for Oct 21 doesn't become available until the early morning # hours of Oct 22. This means that there are times on the 22nd at which we # cannot reasonably expect to have data for the 21st available. To be # conservative, we instead expect that at any time on the 22nd, we can # download data for Tuesday the 20th, which is two full trading days prior # to the date on which we're running a test. # We'll attempt to download new data if the latest entry in our cache is # before this date. ''' if(bundle_data): # If we are using the bundle to retrieve the cryptobenchmark, find # the last date for which there is trading data in the bundle asset = bundle_data.asset_finder.lookup_symbol( symbol=bm_symbol,as_of_date=None) ix = bundle_data.daily_bar_reader._last_rows[asset.sid] last_date = pd.to_datetime( bundle_data.daily_bar_reader._spot_col('day')[ix],unit='s') else: last_date = trading_days[trading_days.get_loc(now, method='ffill') - 2] ''' last_date = trading_days[trading_days.get_loc(end_dt, method='ffill') - 1] if exchange is None: # This is exceptional, since placing the import at the module scope # breaks things and it's only needed here from catalyst.exchange.utils.factory import get_exchange exchange = get_exchange( exchange_name='bitfinex', base_currency='usd' ) exchange.init() benchmark_asset = exchange.get_asset(bm_symbol) # exchange.get_history_window() already ensures that we have the right data # for the right dates br = exchange.get_history_window_with_bundle( assets=[benchmark_asset], end_dt=last_date, bar_count=pd.Timedelta(last_date - start_dt).days, frequency='1d', field='close', data_frequency='daily', force_auto_ingest=True) br.columns = ['close'] br = br.pct_change(1).iloc[1:] br.loc[start_dt] = 0 br = br.sort_index() # Override first_date for treasury data since we have it for many more # years and is independent of crypto data first_date_treasury = pd.Timestamp('1990-01-02', tz='UTC') tc = ensure_treasury_data( bm_symbol, first_date_treasury, last_date, end_dt, environ, ) benchmark_returns = br[br.index.slice_indexer(start_dt, last_date)] treasury_curves = tc[ tc.index.slice_indexer(first_date_treasury, last_date)] return benchmark_returns, treasury_curves
def ingest_csv(self, path, data_frequency, empty_rows_behavior='strip', duplicates_threshold=100): """ Ingest price data from a CSV file. Parameters ---------- path: str data_frequency: str Returns ------- list[str] A list of potential problems detected during ingestion. """ log.info('ingesting csv file: {}'.format(path)) if self.exchange is None: # Avoid circular dependencies from catalyst.exchange.utils.factory import get_exchange self.exchange = get_exchange(self.exchange_name) problems = [] df = pd.read_csv( path, header=0, sep=',', dtype=dict( symbol=np.object_, last_traded=np.object_, open=np.float64, high=np.float64, low=np.float64, close=np.float64, volume=np.float64 ), parse_dates=['last_traded'], index_col=None ) min_start_dt = None max_end_dt = None symbols = df['symbol'].unique() # Apply the timezone before creating an index for simplicity df['last_traded'] = df['last_traded'].dt.tz_localize(pytz.UTC) df.set_index(['symbol', 'last_traded'], drop=True, inplace=True) assets = dict() for symbol in symbols: start_dt = df.index.get_level_values(1).min() end_dt = df.index.get_level_values(1).max() end_dt_key = 'end_{}'.format(data_frequency) market = self.exchange.get_market(symbol) if market is None: raise ValueError('symbol not available in the exchange.') params = dict( exchange=self.exchange.name, data_source='local', exchange_symbol=market['id'], ) mixin_market_params(self.exchange_name, params, market) asset_def = self.exchange.get_asset_def(market, True) if asset_def is not None: params['symbol'] = asset_def['symbol'] params['start_date'] = asset_def['start_date'] \ if asset_def['start_date'] < start_dt else start_dt params['end_date'] = asset_def[end_dt_key] \ if asset_def[end_dt_key] > end_dt else end_dt params['end_daily'] = end_dt \ if data_frequency == 'daily' else asset_def['end_daily'] params['end_minute'] = end_dt \ if data_frequency == 'minute' else asset_def['end_minute'] else: params['symbol'] = get_catalyst_symbol(market) params['end_daily'] = end_dt \ if data_frequency == 'daily' else 'N/A' params['end_minute'] = end_dt \ if data_frequency == 'minute' else 'N/A' if min_start_dt is None or start_dt < min_start_dt: min_start_dt = start_dt if max_end_dt is None or end_dt > max_end_dt: max_end_dt = end_dt asset = TradingPair(**params) assets[market['id']] = asset save_exchange_symbols(self.exchange_name, assets, True) writer = self.get_writer( start_dt=min_start_dt.replace(hour=00, minute=00), end_dt=max_end_dt.replace(hour=23, minute=59), data_frequency=data_frequency ) for symbol in assets: # here the symbol is the market['id'] asset = assets[symbol] ohlcv_df = df.loc[ (df.index.get_level_values(0) == asset.symbol) ] # type: pd.DataFrame ohlcv_df.index = ohlcv_df.index.droplevel(0) period_start = start_dt.replace(hour=00, minute=00) period_end = end_dt.replace(hour=23, minute=59) periods = self.get_calendar_periods_range( period_start, period_end, data_frequency ) # We're not really resampling but ensuring that each frame # contains data ohlcv_df = ohlcv_df.reindex(periods, method='ffill') ohlcv_df['volume'] = ohlcv_df['volume'].fillna(0) problems += self.ingest_df( ohlcv_df=ohlcv_df, data_frequency=data_frequency, asset=asset, writer=writer, empty_rows_behavior=empty_rows_behavior, duplicates_threshold=duplicates_threshold ) return filter(partial(is_not, None), problems)
def _run(handle_data, initialize, before_trading_start, analyze, algofile, algotext, defines, data_frequency, capital_base, data, bundle, bundle_timestamp, start, end, output, print_algo, local_namespace, environ, live, exchange, algo_namespace, base_currency, live_graph, analyze_live, simulate_orders, stats_output): """Run a backtest for the given algorithm. This is shared between the cli and :func:`catalyst.run_algo`. """ if algotext is not None: if local_namespace: ip = get_ipython() # noqa namespace = ip.user_ns else: namespace = {} for assign in defines: try: name, value = assign.split('=', 2) except ValueError: raise ValueError( 'invalid define %r, should be of the form name=value' % assign, ) try: # evaluate in the same namespace so names may refer to # eachother namespace[name] = eval(value, namespace) except Exception as e: raise ValueError( 'failed to execute definition for name %r: %s' % (name, e), ) elif defines: raise _RunAlgoError( 'cannot pass define without `algotext`', "cannot pass '-D' / '--define' without '-t' / '--algotext'", ) else: namespace = {} if algofile is not None: algotext = algofile.read() if print_algo: if PYGMENTS: highlight( algotext, PythonLexer(), TerminalFormatter(), outfile=sys.stdout, ) else: click.echo(algotext) mode = 'paper-trading' if simulate_orders else 'live-trading' \ if live else 'backtest' log.info('running algo in {mode} mode'.format(mode=mode)) exchange_name = exchange if exchange_name is None: raise ValueError('Please specify at least one exchange.') exchange_list = [x.strip().lower() for x in exchange.split(',')] exchanges = dict() for exchange_name in exchange_list: exchanges[exchange_name] = get_exchange( exchange_name=exchange_name, base_currency=base_currency, must_authenticate=(live and not simulate_orders), skip_init=True, ) open_calendar = get_calendar('OPEN') env = TradingEnvironment( load=partial(load_crypto_market_data, environ=environ, start_dt=start, end_dt=end), environ=environ, exchange_tz='UTC', asset_db_path=None # We don't need an asset db, we have exchanges ) env.asset_finder = ExchangeAssetFinder(exchanges=exchanges) def choose_loader(column): bound_cols = TradingPairPricing.columns if column in bound_cols: return ExchangePricingLoader(data_frequency) raise ValueError("No PipelineLoader registered for column %s." % column) if live: start = pd.Timestamp.utcnow() # TODO: fix the end data. end = start + timedelta(hours=8760) data = DataPortalExchangeLive(exchanges=exchanges, asset_finder=env.asset_finder, trading_calendar=open_calendar, first_trading_day=pd.to_datetime( 'today', utc=True)) def fetch_capital_base(exchange, attempt_index=0): """ Fetch the base currency amount required to bootstrap the algorithm against the exchange. The algorithm cannot continue without this value. :param exchange: the targeted exchange :param attempt_index: :return capital_base: the amount of base currency available for trading """ try: log.debug('retrieving capital base in {} to bootstrap ' 'exchange {}'.format(base_currency, exchange_name)) balances = exchange.get_balances() except ExchangeRequestError as e: if attempt_index < 20: log.warn('could not retrieve balances on {}: {}'.format( exchange.name, e)) sleep(5) return fetch_capital_base(exchange, attempt_index + 1) else: raise ExchangeRequestErrorTooManyAttempts( attempts=attempt_index, error=e) if base_currency in balances: base_currency_available = balances[base_currency]['free'] log.info( 'base currency available in the account: {} {}'.format( base_currency_available, base_currency)) return base_currency_available else: raise BaseCurrencyNotFoundError(base_currency=base_currency, exchange=exchange_name) if not simulate_orders: for exchange_name in exchanges: exchange = exchanges[exchange_name] balance = fetch_capital_base(exchange) if balance < capital_base: raise NotEnoughCapitalError( exchange=exchange_name, base_currency=base_currency, balance=balance, capital_base=capital_base, ) sim_params = create_simulation_parameters(start=start, end=end, capital_base=capital_base, emission_rate='minute', data_frequency='minute') # TODO: use the constructor instead sim_params._arena = 'live' algorithm_class = partial( ExchangeTradingAlgorithmLive, exchanges=exchanges, algo_namespace=algo_namespace, live_graph=live_graph, simulate_orders=simulate_orders, stats_output=stats_output, analyze_live=analyze_live, ) elif exchanges: # Removed the existing Poloniex fork to keep things simple # We can add back the complexity if required. # I don't think that we should have arbitrary price data bundles # Instead, we should center this data around exchanges. # We still need to support bundles for other misc data, but we # can handle this later. data = DataPortalExchangeBacktest( exchange_names=[exchange_name for exchange_name in exchanges], asset_finder=None, trading_calendar=open_calendar, first_trading_day=start, last_available_session=end) sim_params = create_simulation_parameters( start=start, end=end, capital_base=capital_base, data_frequency=data_frequency, emission_rate=data_frequency, ) algorithm_class = partial(ExchangeTradingAlgorithmBacktest, exchanges=exchanges) elif bundle is not None: bundle_data = load( bundle, environ, bundle_timestamp, ) prefix, connstr = re.split( r'sqlite:///', str(bundle_data.asset_finder.engine.url), maxsplit=1, ) if prefix: raise ValueError( "invalid url %r, must begin with 'sqlite:///'" % str(bundle_data.asset_finder.engine.url), ) env = TradingEnvironment(asset_db_path=connstr, environ=environ) first_trading_day = \ bundle_data.equity_minute_bar_reader.first_trading_day data = DataPortal( env.asset_finder, open_calendar, first_trading_day=first_trading_day, equity_minute_reader=bundle_data.equity_minute_bar_reader, equity_daily_reader=bundle_data.equity_daily_bar_reader, adjustment_reader=bundle_data.adjustment_reader, ) perf = algorithm_class( namespace=namespace, env=env, get_pipeline_loader=choose_loader, sim_params=sim_params, **{ 'initialize': initialize, 'handle_data': handle_data, 'before_trading_start': before_trading_start, 'analyze': analyze, } if algotext is None else { 'algo_filename': getattr(algofile, 'name', '<algorithm>'), 'script': algotext, }).run( data, overwrite_sim_params=False, ) if output == '-': click.echo(str(perf)) elif output != os.devnull: # make the catalyst magic not write any data perf.to_pickle(output) return perf
def test_daily_data_to_minute_table(self): exchange_name = 'poloniex' # Switch between daily and minute for testing data_frequency = 'daily' # data_frequency = 'minute' exchange = get_exchange(exchange_name) assets = [ exchange.get_asset('eth_btc'), exchange.get_asset('etc_btc'), ] start = pd.to_datetime('2017-9-1', utc=True) end = pd.to_datetime('2017-9-30', utc=True) # Preparing the bundle folder root = get_exchange_folder(exchange.name) path = BUNDLE_NAME_TEMPLATE.format(root=root, frequency=data_frequency) ensure_directory(path) exchange_bundle = ExchangeBundle(exchange) # We are using a BcolzMinuteBarWriter even though the data is daily # Each day has a maximum of one bar # I tried setting the minutes_per_day to 1 will not create # unnecessary bars writer = BcolzExchangeBarWriter(rootdir=path, data_frequency=data_frequency, start_session=start, end_session=end, write_metadata=True) # This will read the daily data in a bundle created by # the daily writer. It will write to the minute writer which # we are passing. # Ingesting a second asset to ensure that multiple chunks # don't override each other for asset in assets: exchange_bundle.ingest_ctable(asset=asset, data_frequency=data_frequency, period='2017', start_dt=start, end_dt=end, writer=writer, empty_rows_behavior='strip') reader = BcolzExchangeBarReader(rootdir=path, data_frequency=data_frequency) # Reading the two assets to ensure that no data was lost for asset in assets: sid = asset.sid daily_values = reader.load_raw_arrays( fields=['open', 'high', 'low', 'close', 'volume'], start_dt=start, end_dt=end, sids=[sid], ) print('found {} rows for last ingestion'.format( len(daily_values[0]))) pass
def _run(handle_data, initialize, before_trading_start, analyze, algofile, algotext, defines, data_frequency, capital_base, data, bundle, bundle_timestamp, start, end, output, print_algo, local_namespace, environ, live, exchange, algo_namespace, quote_currency, live_graph, analyze_live, simulate_orders, auth_aliases, stats_output): """Run a backtest for the given algorithm. This is shared between the cli and :func:`catalyst.run_algo`. """ # TODO: refactor for more granularity if algotext is not None: if local_namespace: ip = get_ipython() # noqa namespace = ip.user_ns else: namespace = {} for assign in defines: try: name, value = assign.split('=', 2) except ValueError: raise ValueError( 'invalid define %r, should be of the form name=value' % assign, ) try: # evaluate in the same namespace so names may refer to # eachother namespace[name] = eval(value, namespace) except Exception as e: raise ValueError( 'failed to execute definition for name %r: %s' % (name, e), ) elif defines: raise _RunAlgoError( 'cannot pass define without `algotext`', "cannot pass '-D' / '--define' without '-t' / '--algotext'", ) else: namespace = {} if algofile is not None: algotext = algofile.read() if print_algo: if PYGMENTS: highlight( algotext, PythonLexer(), TerminalFormatter(), outfile=sys.stdout, ) else: click.echo(algotext) log.info('Catalyst version {}'.format(catalyst.__version__)) if not DISABLE_ALPHA_WARNING: log.warn(ALPHA_WARNING_MESSAGE) # sleep(3) if live: if simulate_orders: mode = 'paper-trading' else: mode = 'live-trading' else: mode = 'backtest' log.info('running algo in {mode} mode'.format(mode=mode)) exchange_name = exchange if exchange_name is None: raise ValueError('Please specify at least one exchange.') if isinstance(auth_aliases, string_types): aliases = auth_aliases.split(',') if len(aliases) < 2 or len(aliases) % 2 != 0: raise ValueError( 'the `auth_aliases` parameter must contain an even list ' 'of comma-delimited values. For example, ' '"binance,auth2" or "binance,auth2,bittrex,auth2".' ) auth_aliases = dict(zip(aliases[::2], aliases[1::2])) exchange_list = [x.strip().lower() for x in exchange.split(',')] exchanges = dict() for name in exchange_list: if auth_aliases is not None and name in auth_aliases: auth_alias = auth_aliases[name] else: auth_alias = None exchanges[name] = get_exchange( exchange_name=name, quote_currency=quote_currency, must_authenticate=(live and not simulate_orders), skip_init=True, auth_alias=auth_alias, ) open_calendar = get_calendar('OPEN') env = TradingEnvironment( load=partial( load_crypto_market_data, environ=environ, start_dt=start, end_dt=end ), environ=environ, exchange_tz='UTC', asset_db_path=None # We don't need an asset db, we have exchanges ) env.asset_finder = ExchangeAssetFinder(exchanges=exchanges) def choose_loader(column): bound_cols = TradingPairPricing.columns if column in bound_cols: return ExchangePricingLoader(data_frequency) raise ValueError( "No PipelineLoader registered for column %s." % column ) if live: # TODO: fix the start data. # is_start checks if a start date was specified by user # needed for live clock is_start = True if start is None: start = pd.Timestamp.utcnow() is_start = False elif start: assert pd.Timestamp.utcnow() <= start, \ "specified start date is in the past." elif start and end: assert start < end, "start date is later than end date." # TODO: fix the end data. # is_end checks if an end date was specified by user # needed for live clock is_end = True if end is None: end = start + timedelta(hours=8760) is_end = False data = DataPortalExchangeLive( exchanges=exchanges, asset_finder=env.asset_finder, trading_calendar=open_calendar, first_trading_day=pd.to_datetime('today', utc=True) ) sim_params = create_simulation_parameters( start=start, end=end, capital_base=capital_base, emission_rate='minute', data_frequency='minute' ) # TODO: use the constructor instead sim_params._arena = 'live' algorithm_class = partial( ExchangeTradingAlgorithmLive, exchanges=exchanges, algo_namespace=algo_namespace, live_graph=live_graph, simulate_orders=simulate_orders, stats_output=stats_output, analyze_live=analyze_live, start=start, is_start=is_start, end=end, is_end=is_end, ) elif exchanges: # Removed the existing Poloniex fork to keep things simple # We can add back the complexity if required. # I don't think that we should have arbitrary price data bundles # Instead, we should center this data around exchanges. # We still need to support bundles for other misc data, but we # can handle this later. if (start and start != pd.tslib.normalize_date(start)) or \ (end and end != pd.tslib.normalize_date(end)): # todo: add to Sim_Params the option to # start & end at specific times log.warn( "Catalyst currently starts and ends on the start and " "end of the dates specified, respectively. We hope to " "Modify this and support specific times in a future release." ) data = DataPortalExchangeBacktest( exchange_names=[ex_name for ex_name in exchanges], asset_finder=None, trading_calendar=open_calendar, first_trading_day=start, last_available_session=end ) sim_params = create_simulation_parameters( start=start, end=end, capital_base=capital_base, data_frequency=data_frequency, emission_rate=data_frequency, ) algorithm_class = partial( ExchangeTradingAlgorithmBacktest, exchanges=exchanges ) elif bundle is not None: bundle_data = load( bundle, environ, bundle_timestamp, ) prefix, connstr = re.split( r'sqlite:///', str(bundle_data.asset_finder.engine.url), maxsplit=1, ) if prefix: raise ValueError( "invalid url %r, must begin with 'sqlite:///'" % str(bundle_data.asset_finder.engine.url), ) env = TradingEnvironment(asset_db_path=connstr, environ=environ) first_trading_day = \ bundle_data.equity_minute_bar_reader.first_trading_day data = DataPortal( env.asset_finder, open_calendar, first_trading_day=first_trading_day, equity_minute_reader=bundle_data.equity_minute_bar_reader, equity_daily_reader=bundle_data.equity_daily_bar_reader, adjustment_reader=bundle_data.adjustment_reader, ) perf = algorithm_class( namespace=namespace, env=env, get_pipeline_loader=choose_loader, sim_params=sim_params, **{ 'initialize': initialize, 'handle_data': handle_data, 'before_trading_start': before_trading_start, 'analyze': analyze, } if algotext is None else { 'algo_filename': getattr(algofile, 'name', '<algorithm>'), 'script': algotext, } ).run( data, overwrite_sim_params=False, ) if output == '-': click.echo(str(perf)) elif output != os.devnull: # make the catalyst magic not write any data perf.to_pickle(output) return perf
from catalyst.exchange.utils.factory import get_exchange for exchange_name in ["gdax", "binance"]: exchange = get_exchange(exchange_name) assets = exchange.get_assets() print(exchange.tickers(assets[0:2])) print(exchange.tickers([assets[0]]))
@author: cheng.li """ import io import pandas as pd import numpy as np import sqlalchemy as sa from catalyst.exchange.utils.factory import get_exchange import catalyst.exchange.exchange_bcolz as bz engine = sa.create_engine('postgresql+psycopg2://postgres:[email protected]/crypto') exchange_name = 'bitfinex' exchange = get_exchange(exchange_name=exchange_name, quote_currency='usd', must_authenticate=False, skip_init=True, auth_alias=None) reader = bz.BcolzExchangeBarReader(rootdir=r'C:\Users\wegamekinglc\.catalyst\data\exchanges\{0}\minute_bundle'.format(exchange_name), data_frequency='minute') exchange.init() assets = exchange.assets sids = [a.sid for a in assets] start_dt = pd.to_datetime('2017-07-01') end_dt = pd.to_datetime('2018-07-08 23:59:00') periods = pd.date_range(start_dt, end_dt, freq='T') + pd.Timedelta(minutes=1)
def update_symbols_file(self, assets): if self.exchange is None: # Avoid circular dependencies from catalyst.exchange.utils.factory import get_exchange self.exchange = get_exchange(self.exchange_name) # check if the symbols.json file was updated today try: root = get_exchange_folder(self.exchange_name) timestamp = os.path.getmtime(os.path.join(root, 'symbols.json')) file_dt = pd.to_datetime(timestamp, unit='s', utc=True) except FileNotFoundError: file_dt = None log.info("updating symbols.json") try: existing_symbols_defs = get_exchange_symbols(self.exchange_name) except ExchangeSymbolsNotFound: existing_symbols_defs = {} self.exchange.api.load_markets() results = {} for asset in assets: if asset.symbol in INGEST_PAIRS_INCLUDED or self._matches_included_quote( asset.symbol): if asset.exchange_symbol in existing_symbols_defs: existing_def = existing_symbols_defs[asset.exchange_symbol] if self.exchange.api.markets[asset.asset_name.replace( ' ', '')]['active']: end_date = pd.Timestamp.utcnow().floor('1D') existing_def['end_minute'] = end_date existing_def['end_daily'] = end_date log.debug("updated {} symbol -> [still active]", asset.symbol) results[asset.exchange_symbol] = existing_def continue elif file_dt is not None and pd.Timestamp( existing_def['end_daily']) < file_dt.floor('1D'): log.debug("updated {} symbol -> [already delisted]", asset.symbol) results[asset.exchange_symbol] = existing_def continue # either the symbol is new or it has been delisted since the last update try: end_results = self.exchange.get_candles( freq='1H', assets=asset, start_dt=None, end_dt=None, bar_count=1, keep_empty_start=True) if len(end_results) == 0: raise Exception("no end cancles found for {}", asset.symbol) last_date = end_results[-1]['last_traded'].floor('1D') start_results = self.exchange.get_candles( freq='1D', assets=asset, start_dt=pd.Timestamp("2009-01-01", tz='utc'), end_dt=None, bar_count=1, keep_empty_start=True) if len(start_results) == 0: raise Exception("no start cancles found for {}", asset.symbol) first_date = start_results[-1]['last_traded'].floor('1D') symbol_dates = { 'end_minute': last_date, 'end_daily': last_date, 'start_date': first_date, 'symbol': asset.symbol } if last_date != pd.Timestamp.utcnow().floor('1D'): log.info("updated {} symbol [new delisted]", asset.symbol) else: log.info("updated {} symbol [new listed]", asset.symbol) results[asset.exchange_symbol] = symbol_dates except: log.exception("error building symbol dates for {}".format( asset.symbol)) pass save_exchange_symbols_dicts(self.exchange_name, results)
def test_orders(self): population = 3 quote_currency = 'eth' order_amount = 0.1 # exchanges = select_random_exchanges( # population=population, # features=['fetchOrder'], # is_authenticated=True, # base_currency=quote_currency, # ) # Type: list[Exchange] exchanges = [ get_exchange( 'binance', base_currency=quote_currency, must_authenticate=True, ) ] log_catcher = TestHandler() with log_catcher: for exchange in exchanges: exchange.init() assets = exchange.get_assets(quote_currency=quote_currency) asset = select_random_assets(assets, 1)[0] self.assertIsInstance(asset, TradingPair) tickers = exchange.tickers([asset]) price = tickers[asset]['last_price'] amount = order_amount / price limit_price = price * 0.8 style = ExchangeLimitOrder(limit_price=limit_price) order = exchange.order( asset=asset, amount=amount, style=style, ) sleep(1) open_order = exchange.get_order(order.id, asset) self.assertEqual(0, open_order.status) exchange.cancel_order(open_order, asset) sleep(1) canceled_order = exchange.get_order(open_order.id, asset) warnings = [ record for record in log_catcher.records if record.level == WARNING ] self.assertEqual(0, len(warnings)) self.assertEqual(2, canceled_order.status) print('tested {exchange} / {symbol}, order: {order}'.format( exchange=exchange.name, symbol=asset.symbol, order=order.id, )) pass
def test_orders(self): population = 3 quote_currency = 'eth' order_amount = 0.1 # exchanges = select_random_exchanges( # population=population, # features=['fetchOrder'], # is_authenticated=True, # base_currency=quote_currency, # ) # Type: list[Exchange] exchanges = [ get_exchange( 'binance', base_currency=quote_currency, must_authenticate=True, ) ] log_catcher = TestHandler() with log_catcher: for exchange in exchanges: exchange.init() assets = exchange.get_assets(quote_currency=quote_currency) asset = select_random_assets(assets, 1)[0] self.assertIsInstance(asset, TradingPair) tickers = exchange.tickers([asset]) price = tickers[asset]['last_price'] amount = order_amount / price limit_price = price * 0.8 style = ExchangeLimitOrder(limit_price=limit_price) order = exchange.order( asset=asset, amount=amount, style=style, ) sleep(1) open_order = exchange.get_order(order.id, asset) self.assertEqual(0, open_order.status) exchange.cancel_order(open_order, asset) sleep(1) canceled_order = exchange.get_order(open_order.id, asset) warnings = [record for record in log_catcher.records if record.level == WARNING] self.assertEqual(0, len(warnings)) self.assertEqual(2, canceled_order.status) print( 'tested {exchange} / {symbol}, order: {order}'.format( exchange=exchange.name, symbol=asset.symbol, order=order.id, ) ) pass
def load_crypto_market_data(trading_day=None, trading_days=None, bm_symbol=None, bundle=None, bundle_data=None, environ=None, exchange=None, start_dt=None, end_dt=None): if trading_day is None: trading_day = get_calendar('OPEN').trading_day # TODO: consider making configurable bm_symbol = 'btc_usd' # if trading_days is None: # trading_days = get_calendar('OPEN').schedule # if start_dt is None: start_dt = get_calendar('OPEN').first_trading_session if end_dt is None: end_dt = pd.Timestamp.utcnow() # We expect to have benchmark and treasury data that's current up until # **two** full trading days prior to the most recently completed trading # day. # Example: # On Thu Oct 22 2015, the previous completed trading day is Wed Oct 21. # However, data for Oct 21 doesn't become available until the early morning # hours of Oct 22. This means that there are times on the 22nd at which we # cannot reasonably expect to have data for the 21st available. To be # conservative, we instead expect that at any time on the 22nd, we can # download data for Tuesday the 20th, which is two full trading days prior # to the date on which we're running a test. # We'll attempt to download new data if the latest entry in our cache is # before this date. ''' if(bundle_data): # If we are using the bundle to retrieve the cryptobenchmark, find # the last date for which there is trading data in the bundle asset = bundle_data.asset_finder.lookup_symbol( symbol=bm_symbol,as_of_date=None) ix = bundle_data.daily_bar_reader._last_rows[asset.sid] last_date = pd.to_datetime( bundle_data.daily_bar_reader._spot_col('day')[ix],unit='s') else: last_date = trading_days[trading_days.get_loc(now, method='ffill') - 2] ''' last_date = trading_days[trading_days.get_loc(end_dt, method='ffill') - 1] if exchange is None: # This is exceptional, since placing the import at the module scope # breaks things and it's only needed here from catalyst.exchange.utils.factory import get_exchange exchange = get_exchange(exchange_name='bitfinex', base_currency='usd') exchange.init() benchmark_asset = exchange.get_asset(bm_symbol) # exchange.get_history_window() already ensures that we have the right data # for the right dates br = exchange.get_history_window_with_bundle( assets=[benchmark_asset], end_dt=last_date, bar_count=pd.Timedelta(last_date - start_dt).days, frequency='1d', field='close', data_frequency='daily', force_auto_ingest=True) br.columns = ['close'] br = br.pct_change(1).iloc[1:] br.loc[start_dt] = 0 br = br.sort_index() # Override first_date for treasury data since we have it for many more # years and is independent of crypto data first_date_treasury = pd.Timestamp('1990-01-02', tz='UTC') tc = ensure_treasury_data( bm_symbol, first_date_treasury, last_date, end_dt, environ, ) benchmark_returns = br[br.index.slice_indexer(start_dt, last_date)] treasury_curves = tc[tc.index.slice_indexer(first_date_treasury, last_date)] return benchmark_returns, treasury_curves