def _get_symbols_last_trade_value(self, symbols): """ Query last_trade in parallel for multiple symbols and return in dict. symbols: list[str] """ @skip_http_error((404, 504)) def fetch(symbol): return self._api.get_latest_trade(symbol) return parallelize(fetch)(symbols)
def can_trade(self, assets): """ For the given asset or iterable of assets, returns true if all of the following are true: 1) the asset is alive for the session of the current simulation time (if current simulation time is not a market minute, we use the next session) 2) (if we are in minute mode) the asset's exchange is open at the current simulation time or at the simulation calendar's next market minute 3) there is a known last price for the asset. Notes ----- The second condition above warrants some further explanation. - If the asset's exchange calendar is identical to the simulation calendar, then this condition always returns True. - If there are market minutes in the simulation calendar outside of this asset's exchange's trading hours (for example, if the simulation is running on the CME calendar but the asset is MSFT, which trades on the NYSE), during those minutes, this condition will return false (for example, 3:15 am Eastern on a weekday, during which the CME is open but the NYSE is closed). Parameters ---------- assets: Asset or iterable of assets Returns ------- can_trade : bool or pd.Series[bool] indexed by asset. """ dt = self.datetime if self._adjust_minutes: adjusted_dt = self._get_current_minute() else: adjusted_dt = dt data_portal = self.data_portal if isinstance(assets, Asset): return self._can_trade_for_asset(assets, dt, adjusted_dt, data_portal) else: def fetch(asset): return self._can_trade_for_asset(asset, dt, adjusted_dt, data_portal) tradeable = parallelize(fetch)(assets) return pd.Series(data=tradeable, index=assets, dtype=bool)
def _symbol_trades(self, symbols): ''' Query last_trade in parallel for multiple symbols and return in dict. symbols: list[str] return: dict[str -> polygon.Trade] ''' @skip_http_error((404, 504)) def fetch(symbol): return self._api.polygon.last_trade(symbol) return parallelize(fetch)(symbols)
def _symbol_bars( self, symbols, size, _from=None, to=None, limit=None): ''' Query historic_agg either minute or day in parallel for multiple symbols, and return in dict. symbols: list[str] size: str ('day', 'minute') _from: str or pd.Timestamp to: str or pd.Timestamp limit: str or int return: dict[str -> pd.DataFrame] ''' assert size in ('day', 'minute') # temp workaround for less bars after masking by # market hours query_limit = limit if query_limit is not None: query_limit *= 2 @skip_http_error((404, 504)) def fetch(symbol): df = self._api.polygon.historic_agg( size, symbol, _from, to, query_limit).df # zipline -> right label # API result -> left label (beginning of bucket) if size == 'minute': df.index += pd.Timedelta('1min') # mask out bars outside market hours mask = self._cal.minutes_in_range( df.index[0], df.index[-1], ).tz_convert(NY) df = df.reindex(mask) if limit is not None: df = df.iloc[-limit:] return df return parallelize(fetch)(symbols)
def _get_symbols_last_trade_value(self, symbols): """ Query last_trade in parallel for multiple symbols and return in dict. symbols: list[str] return: dict[str -> polygon.Trade or alpaca.Trade] """ @skip_http_error((404, 504)) def fetch(symbol): if self._use_polygon: return self._api.polygon.last_trade(symbol) else: return self._api.get_last_trade(symbol) return parallelize(fetch)(symbols)
def _symbol_bars( self, symbols, size, _from=None, to=None, limit=None): ''' Query historic_agg either minute or day in parallel for multiple symbols, and return in dict. symbols: list[str] size: str ('day', 'minute') _from: str or pd.Timestamp to: str or pd.Timestamp limit: str or int return: dict[str -> pd.DataFrame] ''' assert size in ('day', 'minute') if not (_from or to): to = pd.to_datetime('now', utc=True).tz_convert('America/New_York') if not (_from and to) and limit: # temp workaround for less bars after masking by # market hours query_limit = limit if query_limit is not None: query_limit *= 2 if _from: if size == 'day': to = _from + timedelta(days=query_limit+1) else: to = _from + timedelta(minutes=query_limit+1) else: if size == 'day': _from = to - timedelta(days=query_limit+1) else: _from = to - timedelta(minutes=query_limit+1) @skip_http_error((404, 504)) def fetch(symbol): df = self._api.polygon.historic_agg_v2( symbol, 1, size, int(_from.timestamp()) * 1000, int(to.timestamp()) * 1000 ).df # rename Polygon's v2 agg fields to match their full titles df = df.rename(index=str, columns={ 't': 'timestamp', 'o': 'open', 'h': 'high', 'l': 'low', 'c': 'close', 'v': 'volume' }) # convert timestamps to datetimes # astype is necessary to deal with empty result df.index = pd.to_datetime( df.index.astype('str'), utc=True, ).tz_convert('America/New_York') df.index.name = 'timestamp' # zipline -> right label # API result -> left label (beginning of bucket) if size == 'minute': df.index += pd.Timedelta('1min') if not df.empty: # mask out bars outside market hours mask = self._cal.minutes_in_range( df.index[0], df.index[-1], ).tz_convert(NY) df = df.reindex(mask) if limit is not None: df = df.iloc[-limit:] return df return parallelize(fetch)(symbols)
def _fetch_bars_from_api(self, symbols, size, _from=None, to=None, limit=None): """ Query history bars either minute or day in parallel for multiple symbols you can pass: 1 _from + to 2 to + limit 3 limit, this way will use the current time as to symbols: list[str] size: str ('day', 'minute') _from: str or pd.Timestamp to: str or pd.Timestamp limit: str or int return: MultiIndex dataframe that looks like this: AA GOOG open high low close volume open high low close volume DatetimeIndex: columns: level 0 equity name, level 1 OHLCV """ assert size in ('day', 'minute') assert (_from and to) or limit if not (_from and to): _from, to = self._get_from_and_to(size, limit, end_dt=to) if self._use_polygon: args = [{ 'symbols': symbol, '_from': _from, "to": to, "size": size } for symbol in symbols] result = parallelize(self._fetch_bars_from_api_internal)(args) if [df for df in result.values() if isinstance(df, pd.DataFrame)]: return pd.concat(result.values(), axis=1) else: return pd.DataFrame([]) else: # alpaca support get real-time data of multi stocks(<200) at once parts = [] for i in range(0, len(symbols), ALPACA_MAX_SYMBOLS_PER_REQUEST): part = symbols[i:i + ALPACA_MAX_SYMBOLS_PER_REQUEST] parts.append(part) args = [{ 'symbols': part, '_from': _from, "to": to, "size": size, "limit": limit } for part in parts] result = parallelize_with_multi_process( self._fetch_bars_from_api_internal)(args) return pd.concat(result, axis=1)
def current(self, assets, fields): multiple_assets = _is_iterable(assets) multiple_fields = _is_iterable(fields) asset_list = assets if _is_iterable(assets) else [assets] field_list = fields if _is_iterable(fields) else [fields] fetch_args = [] for asset in asset_list: for field in field_list: fetch_args.append((asset, field)) if not self._adjust_minutes: def fetch(asset, field): return self.data_portal.get_spot_value( asset, field, self._get_current_minute(), self.data_frequency) else: def fetch(asset, field): return self.data_portal.get_adjusted_value( asset, field, self._get_current_minute(), None, # this is used to be self.simulation_dt_func(). but # it is a zipline residue, and it's not used # anyways. so, just use empty arg self.data_frequency) results = parallelize(fetch)(fetch_args) if not multiple_assets and not multiple_fields: # Return scalar value return results[(assets, fields)] elif multiple_assets and multiple_fields: # Return DataFrame indexed on field field_results = {field: {} for field in fields} for args, result in results.items(): (asset, field) = args field_results[field][asset] = result data = {} for field in fields: series = pd.Series(data=field_results[field], index=assets, name=field) data[field] = series return pd.DataFrame(data) elif multiple_assets: # Multiple assets, single field # Return Series indexed on assets asset_results = {} for args, result in results.items(): (asset, field) = args asset_results[asset] = result return pd.Series(data=asset_results, index=assets, name=fields) else: # Single asset, multiple fields # Return Series indexed on fields field_results = {} for args, result in results.items(): (asset, field) = args field_results[field] = result return pd.Series(data=field_results, index=fields, name=assets.symbol)
def _symbol_bars(self, symbols, size, _from=None, to=None, limit=None): """ Query historic_agg_v2 either minute or day in parallel for multiple symbols, and return in dict. symbols: list[str] size: str ('day', 'minute') _from: str or pd.Timestamp to: str or pd.Timestamp limit: str or int return: dict[str -> pd.DataFrame] """ assert size in ('day', 'minute') if not (_from or to): to = pd.to_datetime('now', utc=True).tz_convert('America/New_York') if not (_from and to) and limit: # temp workaround for less bars after masking by # market hours query_limit = limit if query_limit is not None: query_limit *= 2 if _from: if size == 'day': to = _from + timedelta(days=query_limit + 1) else: to = _from + timedelta(minutes=query_limit + 1) else: if size == 'day': _from = to - timedelta(days=query_limit + 1) else: _from = to - timedelta(minutes=query_limit + 1) @skip_http_error((404, 504)) def fetch(symbol): if self._use_polygon: df = self._api.polygon.historic_agg_v2( symbol, 1, size, int(_from.timestamp()) * 1000, int(to.timestamp()) * 1000).df else: df = self._api.get_barset(symbol, size, start=_from.date().isoformat(), end=to.date().isoformat(), limit=limit).df[symbol] # zipline -> right label # API result -> left label (beginning of bucket) if size == 'minute': df.index += pd.Timedelta('1min') if not df.empty: # mask out bars outside market hours mask = self._cal.minutes_in_range( df.index[0], df.index[-1], ).tz_convert(NY) df = df.reindex(mask) if limit is not None: df = df.iloc[-limit:] return df return parallelize(fetch)(symbols)