def create_trading_pair(self, market, asset_def=None, is_local=False): """ Creating a TradingPair from market and asset data. Parameters ---------- market: dict[str, Object] asset_def: dict[str, Object] is_local: bool Returns ------- """ data_source = 'local' if is_local else 'catalyst' params = dict( exchange=self.name, data_source=data_source, exchange_symbol=market['id'], ) mixin_market_params(self.name, params, market) if asset_def is not None: params['symbol'] = asset_def['symbol'] params['start_date'] = asset_def['start_date'] \ if 'start_date' in asset_def else None params['leverage'] = asset_def['leverage'] \ if 'leverage' in asset_def else 1.0 params['asset_name'] = asset_def['asset_name'] \ if 'asset_name' in asset_def else None if market['active']: now = pd.Timestamp.now('UTC').floor('D') delta = pd.Timedelta('1000D') end_date = now + delta params['end_date'] = end_date params['end_daily'] = end_date params['end_minute'] = end_date else: params['end_date'] = asset_def['end_date'] \ if 'end_date' in asset_def else None params['end_daily'] = asset_def['end_daily'] \ if 'end_daily' in asset_def \ and asset_def['end_daily'] != 'N/A' else None params['end_minute'] = asset_def['end_minute'] \ if 'end_minute' in asset_def \ and asset_def['end_minute'] != 'N/A' else None else: params['symbol'] = get_catalyst_symbol(market) # TODO: add as an optional column params['leverage'] = 1.0 return TradingPair(**params)
def load_assets(self): """ Populate the 'assets' attribute with a dictionary of Assets. The key of the resulting dictionary is the exchange specific currency pair symbol. The universal symbol is contained in the 'symbol' attribute of each asset. Notes ----- The sid of each asset is calculated based on a numeric hash of the universal symbol. This simple approach avoids maintaining a mapping of sids. This method can be overridden if an exchange offers equivalent data via its api. """ symbol_map = self.fetch_symbol_map() for exchange_symbol in symbol_map: asset = symbol_map[exchange_symbol] if 'start_date' in asset: start_date = pd.to_datetime(asset['start_date'], utc=True) else: start_date = None if 'end_date' in asset: end_date = pd.to_datetime(asset['end_date'], utc=True) else: end_date = None if 'leverage' in asset: leverage = asset['leverage'] else: leverage = 1.0 if 'asset_name' in asset: asset_name = asset['asset_name'] else: asset_name = None trading_pair = TradingPair(symbol=asset['symbol'], exchange=self.name, start_date=start_date, end_date=end_date, leverage=leverage, asset_name=asset_name) self.assets[exchange_symbol] = trading_pair
def ingest_csv(self, path, data_frequency, empty_rows_behavior='strip', duplicates_threshold=100): """ Ingest price data from a CSV file. Parameters ---------- path: str data_frequency: str Returns ------- list[str] A list of potential problems detected during ingestion. """ log.info('ingesting csv file: {}'.format(path)) if self.exchange is None: # Avoid circular dependencies from catalyst.exchange.factory import get_exchange self.exchange = get_exchange(self.exchange_name) problems = [] df = pd.read_csv(path, header=0, sep=',', dtype=dict(symbol=np.object_, last_traded=np.object_, open=np.float64, high=np.float64, close=np.float64, volume=np.float64), parse_dates=['last_traded'], index_col=None) min_start_dt = None max_end_dt = None symbols = df['symbol'].unique() # Apply the timezone before creating an index for simplicity df['last_traded'] = df['last_traded'].dt.tz_localize(pytz.UTC) df.set_index(['symbol', 'last_traded'], drop=True, inplace=True) assets = dict() for symbol in symbols: start_dt = df.index.get_level_values(1).min() end_dt = df.index.get_level_values(1).max() end_dt_key = 'end_{}'.format(data_frequency) market = self.exchange.get_market(symbol) if market is None: raise ValueError('symbol not available in the exchange.') params = dict( exchange=self.exchange.name, data_source='local', exchange_symbol=market['id'], ) mixin_market_params(self.exchange_name, params, market) asset_def = self.exchange.get_asset_def(market, True) if asset_def is not None: params['symbol'] = asset_def['symbol'] params['start_date'] = asset_def['start_date'] \ if asset_def['start_date'] < start_dt else start_dt params['end_date'] = asset_def[end_dt_key] \ if asset_def[end_dt_key] > end_dt else end_dt params['end_daily'] = end_dt \ if data_frequency == 'daily' else asset_def['end_daily'] params['end_minute'] = end_dt \ if data_frequency == 'minute' else asset_def['end_minute'] else: params['symbol'] = self.exchange.get_catalyst_symbol(market) params['end_daily'] = end_dt \ if data_frequency == 'daily' else 'N/A' params['end_minute'] = end_dt \ if data_frequency == 'minute' else 'N/A' if min_start_dt is None or start_dt < min_start_dt: min_start_dt = start_dt if max_end_dt is None or end_dt > max_end_dt: max_end_dt = end_dt asset = TradingPair(**params) assets[market['id']] = asset save_exchange_symbols(self.exchange_name, assets, True) writer = self.get_writer(start_dt=min_start_dt.replace(hour=00, minute=00), end_dt=max_end_dt.replace(hour=23, minute=59), data_frequency=data_frequency) for symbol in assets: asset = assets[symbol] ohlcv_df = df.loc[( df.index.get_level_values(0) == symbol)] # type: pd.DataFrame ohlcv_df.index = ohlcv_df.index.droplevel(0) period_start = start_dt.replace(hour=00, minute=00) period_end = end_dt.replace(hour=23, minute=59) periods = self.get_calendar_periods_range(period_start, period_end, data_frequency) # We're not really resampling but ensuring that each frame # contains data ohlcv_df = ohlcv_df.reindex(periods, method='ffill') ohlcv_df['volume'] = ohlcv_df['volume'].fillna(0) problems += self.ingest_df( ohlcv_df=ohlcv_df, data_frequency=data_frequency, asset=asset, writer=writer, empty_rows_behavior=empty_rows_behavior, duplicates_threshold=duplicates_threshold) return filter(partial(is_not, None), problems)
def load_assets(self, is_local=False): """ Populate the 'assets' attribute with a dictionary of Assets. The key of the resulting dictionary is the exchange specific currency pair symbol. The universal symbol is contained in the 'symbol' attribute of each asset. Notes ----- The sid of each asset is calculated based on a numeric hash of the universal symbol. This simple approach avoids maintaining a mapping of sids. This method can be omerridden if an exchange offers equivalent data via its api. """ try: symbol_map = self.fetch_symbol_map(is_local) except ExchangeSymbolsNotFound: return None for exchange_symbol in symbol_map: asset = symbol_map[exchange_symbol] if 'start_date' in asset: start_date = pd.to_datetime(asset['start_date'], utc=True) else: start_date = None if 'end_date' in asset: end_date = pd.to_datetime(asset['end_date'], utc=True) else: end_date = None if 'leverage' in asset: leverage = asset['leverage'] else: leverage = 1.0 if 'asset_name' in asset: asset_name = asset['asset_name'] else: asset_name = None if 'min_trade_size' in asset: min_trade_size = asset['min_trade_size'] else: min_trade_size = 0.0000001 if 'end_daily' in asset and asset['end_daily'] != 'N/A': end_daily = pd.to_datetime(asset['end_daily'], utc=True) else: end_daily = None if 'end_minute' in asset and asset['end_minute'] != 'N/A': end_minute = pd.to_datetime(asset['end_minute'], utc=True) else: end_minute = None trading_pair = TradingPair(symbol=asset['symbol'], exchange=self.name, start_date=start_date, end_date=end_date, leverage=leverage, asset_name=asset_name, min_trade_size=min_trade_size, end_daily=end_daily, end_minute=end_minute, exchange_symbol=exchange_symbol) if is_local: self.local_assets[exchange_symbol] = trading_pair else: self.assets[exchange_symbol] = trading_pair