def analyze(perf, filename, doc=None, duration=None, param=None, info=None, show_image=True): num_positions = perf.positions.shape[0] if num_positions == 0: raise ValueError("No positions found") gc.collect() mem = psutil.virtual_memory() log.info("Memory used %.2f Gb von %.2f Gb (%d%%)" % (mem.used / 1e9, mem.total / 1e9, mem.percent)) now = datetime.datetime.now() serialise(perf, filename, now) with warnings.catch_warnings(): # ignore the many pyfolio warnings warnings.simplefilter("ignore") create_report(perf, filename, now, doc, duration, param, info, show_image)
def _pipeline_output(self, pipeline, chunks, name): # This method is taken from TradingAlgorithm. """ Internal implementation of `pipeline_output`. For Live Algo's we have to get the previous session as the Pipeline wont work without, it will extrapolate such that it tries to get data for get_datetime which is today """ today = normalize_date(self.get_datetime()) prev_session = normalize_date( self.trading_calendar.previous_open(today)) log.info('today in _pipeline_output : {}'.format(prev_session)) try: data = self._pipeline_cache.get(name, prev_session) except KeyError: # Calculate the next block. data, valid_until = self.run_pipeline( pipeline, prev_session, next(chunks), ) self._pipeline_cache.set(name, data, valid_until) # Now that we have a cached result, try to return the data for today. try: return data.loc[prev_session] except KeyError: # This happens if no assets passed the pipeline screen on a given # day. return pd.DataFrame(index=[], columns=data.columns)
def log_order(self, contract, ib_order_id, order): if order.orderType == "MKT": log.info( "Placing order-{order_id}: " "{action} {qty} {symbol} with MKT order {tif}.".format( order_id=ib_order_id, action=order.action, qty=order.totalQuantity, symbol=contract.symbol, tif=order.tif )) else: log.info( "Placing order-{order_id}: " "{action} {qty} {symbol} with {order_type} order. " "limit_price={limit_price} stop_price={stop_price} {tif}".format( order_id=ib_order_id, action=order.action, qty=order.totalQuantity, symbol=contract.symbol, order_type=order.orderType, limit_price=order.lmtPrice, stop_price=order.auxPrice, tif=order.tif ))
def synch_to_calendar(sessions, start_date, end_date, df_ticker, df): this_cal = sessions[(sessions >= start_date) & (sessions <= end_date)] missing_dates = (len(this_cal) != df_ticker.shape[0]) if missing_dates: sid = df_ticker.index.get_level_values('sid')[0] ticker = df_ticker['ticker'][0] log.info("Fixing missing interstitial dates for %s (%d)." % (ticker, sid)) sids = np.full(len(this_cal), sid) synch_index = pd.MultiIndex.from_arrays( [this_cal.tz_localize(None), sids], names=('date', 'sid')) df_ticker_synch = df_ticker.reindex(synch_index) # Forward fill missing data, volume and dividens must remain 0 columns_ffill = ['ticker', 'open', 'high', 'low', 'close'] df_ticker_synch[columns_ffill] = df_ticker_synch[columns_ffill].fillna( method='ffill') df_ticker_synch = df_ticker_synch.fillna({'volume': 0, 'dividends': 0}) # Drop remaining NaN df_ticker_synch.dropna(inplace=True) # drop the existing sub dataframe df.drop(df_ticker.index, inplace=True) # and replace if with the new one with all the dates. df.append(df_ticker_synch)
def print_portfolio(log, context): mem = psutil.virtual_memory() log.info("Memory used %.2f Gb von %.2f Gb (%d%%)" % (mem.used / 1e9, mem.total / 1e9, mem.percent)) pdf = describe_portfolio(context.portfolio.positions) log.info('Portfolio Performance:\n{stats}'.format(stats=pdf))
def publish(self, model): try: log.info("Percent completed: %3.0f%% (%s - %s): %s" % (model.percent_complete, str(model.current_chunk_bounds[0].date()), str(model.current_chunk_bounds[1].date()), model.current_work)) except: log.error("Cannot publish progress state.")
def get_data(sharadar_metadata_df, related_tickers, start=None, end=None): df = fetch_data(start, end) log.info("Adding SIDs to all stocks...") df['sid'] = df['ticker'].apply( lambda x: lookup_sid(sharadar_metadata_df, related_tickers, x)) # unknown sids are -1 instead of nan to preserve the integer type. Drop them. unknown_sids = df[df['sid'] == -1] df.drop(unknown_sids.index, inplace=True) df.set_index(['date', 'sid'], inplace=True) df = process_data_table(df) return df.sort_index()
def publish(self, model): try: start = str(model.current_chunk_bounds[0].date()) end = str(model.current_chunk_bounds[1].date()) completed = model.percent_complete work = model.current_work if start == end: log.info("Percent completed: %3.0f%% (%s): %s" % (completed, start, work)) else: log.info("Percent completed: %3.0f%% (%s - %s): %s" % (completed, start, end, work)) except: log.error("Cannot publish progress state.")
def start(self): log.info("Connecting: {}:{}:{}".format(self._host, self._port, self.client_id)) self.connect(self._host, self._port, self.client_id) # Initialise the threads for various components thread = threading.Thread(target=self.run, daemon=True) thread.start() setattr(self, "_thread", thread) timeout = _connection_timeout while timeout and not self.isConnected(): log.info("Cannot connect to TWS. Retrying...") sleep(_poll_frequency) timeout -= _poll_frequency else: if not self.isConnected(): raise SystemError("Connection timeout during TWS connection!") self._download_account_details() log.info("Managed accounts: {}".format(self.managed_accounts)) self.reqCurrentTime() self.reqIds(1) while self.time_skew is None or self._next_order_id is None: sleep(_poll_frequency) log.info("Local-Broker Time Skew: {}".format(self.time_skew))
def create_tradable_stocks_universe(output_dir, prices_start, prices_end): universes_dbpath = os.path.join(output_dir, "universes.sqlite") universe_name = TRADABLE_STOCKS_US screen = TradableStocksUS() universe_start = prices_start.tz_localize('utc') universe_end = prices_end.tz_localize('utc') universe_last_date = UniverseReader(universes_dbpath).get_last_date( universe_name) if not pd.isnull(universe_last_date): universe_start = universe_last_date log.info("Start creating universe '%s' from %s to %s ..." % (universe_name, universe_start, universe_end)) UniverseWriter(universes_dbpath).write(universe_name, screen, universe_start, universe_end)
def error(self, id_=None, error_code=None, error_msg=None): if isinstance(id_, Exception): log.exception(id_) if isinstance(error_code, int): if error_code in (502, 503, 326): # 502: Couldn't connect to TWS. # 503: The TWS is out of date and must be upgraded. # 326: Unable connect as the client id is already in use. self.unrecoverable_error = True if error_code < 1000: log.error("[{}] {} ({})".format(error_code, error_msg, id_)) else: log.info("[{}] {} ({})".format(error_code, error_msg, id_)) else: log.error("[{}] {} ({})".format(error_code, error_msg, id_))
def process_data_table(df): log.info("Adjusting for stock splits...") # Data are adjusted for stock splits, but not for dividends. m = df['closeunadj'] / df['close'] # Remove the split factor to get back the unadjusted data df['open'] *= m df['high'] *= m df['low'] *= m df['close'] = df['closeunadj'] df['volume'] /= m df['dividends'] *= m df = df.drop(['closeunadj', 'lastupdated'], axis=1) df = df.replace([np.inf, -np.inf, np.nan], 0) df = df.fillna({'volume': 0}) return df
def execDetails(self, req_id, contract, exec_detail): order_id, exec_id = exec_detail.orderId, exec_detail.execId self.executions[order_id][exec_id] = _method_params_to_dict(vars()) self._execution_to_order_id[exec_id] = order_id log.info("Order-{order_id} executed @ {exec_time}: " "{symbol} current: {shares} @ ${price} " "total: {cum_qty} @ ${avg_price} " "exec_id: {exec_id} by client-{client_id}".format( order_id=order_id, exec_id=exec_id, exec_time=pd.to_datetime(exec_detail.time), symbol=contract.symbol, shares=exec_detail.shares, price=exec_detail.price, cum_qty=exec_detail.cumQty, avg_price=exec_detail.avgPrice, client_id=exec_detail.clientId))
def run_pipeline(self, pipeline, start_date, end_date=None, chunksize=120, hooks=None): if end_date is None: end_date = start_date if hooks is None: hooks = [ ProgressHooks.with_static_publisher(CliProgressPublisher()) ] if chunksize <= 1: log.info("Compute pipeline values without chunks.") return super().run_pipeline(pipeline, start_date, end_date, hooks) return super().run_chunked_pipeline(pipeline, start_date, end_date, chunksize, hooks)
def initialize(self, *args, **kwargs): self._context_persistence_excludes = \ self._context_persistence_blacklist + \ [e for e in self.__dict__.keys() if e not in self._context_persistence_whitelist] if os.path.isfile(self.state_filename): log.info("Loading state from {}".format(self.state_filename)) load_context(self.state_filename, context=self, checksum=self.algo_filename) self.initialized = False with ZiplineAPI(self): super(self.__class__, self).initialize(*args, **kwargs) store_context(self.state_filename, context=self, checksum=self.algo_filename, exclude_list=self._context_persistence_excludes) self.initialized = True
def __init__(self, *args, **kwargs): self.broker = kwargs.pop('broker', None) self.orders = {} self.algo_filename = kwargs.get('algo_filename', "<algorithm>") self.state_filename = kwargs.pop('state_filename', None) # Persistence blacklist/whitelist and excludes gives a way to include/ # exclude (so do not persist on disk if initiated or excluded from the serialization # function that reinstate or save the context variable to its last state). # trading client can never be serialized, the initialized function and # perf tracker remember the context variables and the past performance # and need to be whitelisted self._context_persistence_blacklist = ['trading_client'] self._context_persistence_whitelist = ['initialized', 'perf_tracker'] self._context_persistence_excludes = [] # blotter is always initialized to SimulationBlotter in run_algo.py. # we override it here to use the LiveBlotter for live algos blotter_live = BlotterLive(broker=self.broker) kwargs['blotter'] = blotter_live super(self.__class__, self).__init__(*args, **kwargs) log.info("initialization done")
def __init__(self, broker): self.broker = broker self._processed_closed_orders = [] self._processed_transactions = [] self.new_orders = [] self.slippage_models = { Equity: FixedBasisPointsSlippage(), Future: VolatilityVolumeShare( volume_limit=DEFAULT_FUTURE_VOLUME_SLIPPAGE_BAR_LIMIT, ), } self.commission_models = { Equity: PerShare(), Future: PerContract( cost=DEFAULT_PER_CONTRACT_COST, exchange_fee=FUTURE_EXCHANGE_FEES_BY_SYMBOL, ), } log.info('Initialized blotter_live')
def write(self, universe_name, screen, pipe_start, pipe_end): log.info("Computing pipeline from %s to %s..." % (pipe_start, pipe_end)) stocks = self._execute_pipeline(screen, pipe_end, pipe_start) # Create schema, if not exists with closing(sqlite3.connect( self.universes_db_path)) as con, con, closing( con.cursor()) as c: c.execute( "SELECT count(name) FROM sqlite_master WHERE type='table' AND name='%s'" % universe_name) if c.fetchone()[0] == 0: c.executescript(SCHEMA % (universe_name, universe_name, universe_name)) log.info("Inserting %d SIDs..." % len(stocks.index)) with progressbar(stocks.index, show_pos=True) as bar: for i in bar: c.execute("INSERT OR REPLACE INTO %s VALUES ('%s', %d);" % (universe_name, i[0].date(), i[1].sid)) log.info("Universe '%s' successful created/updated" % universe_name)
def run_algorithm(initialize, start=None, end=None, capital_base=1e6, handle_data=None, before_trading_start=None, analyze=None, data_frequency='daily', bundle='sharadar', bundle_timestamp=None, trading_calendar=None, metrics_set='default_daily', benchmark_symbol='SPY', default_extension=True, extensions=(), strict_extensions=True, environ=os.environ, blotter='default', broker=None, state_filename=None): """ Run a trading algorithm. Parameters ---------- start : datetime The start date of the backtest. end : datetime The end date of the backtest.. initialize : callable[context -> None] The initialize function to use for the algorithm. This is called once at the very begining of the backtest and should be used to set up any state needed by the algorithm. capital_base : float The starting capital for the backtest. handle_data : callable[(context, BarData) -> None], optional The handle_data function to use for the algorithm. This is called every minute when ``data_frequency == 'minute'`` or every day when ``data_frequency == 'daily'``. before_trading_start : callable[(context, BarData) -> None], optional The before_trading_start function for the algorithm. This is called once before each trading day (after initialize on the first day). analyze : callable[(context, pd.DataFrame) -> None], optional The analyze function to use for the algorithm. This function is called once at the end of the backtest and is passed the context and the performance data. data_frequency : {'daily', 'minute'}, optional The data frequency to run the algorithm at. For live trading the default is 'minute', otherwise 'daily' bundle : str, optional The name of the data bundle to use to load the data to run the backtest with. This defaults to 'quantopian-quandl'. bundle_timestamp : datetime, optional The datetime to lookup the bundle data for. This defaults to the current time. trading_calendar : TradingCalendar, optional The trading calendar to use for your backtest. metrics_set : iterable[Metric] or str, optional The set of metrics to compute in the simulation. If a string is passed, resolve the set with :func:`zipline.finance.metrics.load`. benchmark_symbol: The symbol of the benchmark. For live trading the default None, otherwise 'SPY'. default_extension : bool, optional Should the default zipline extension be loaded. This is found at ``$ZIPLINE_ROOT/extension.py`` extensions : iterable[str], optional The names of any other extensions to load. Each element may either be a dotted module path like ``a.b.c`` or a path to a python file ending in ``.py`` like ``a/b/c.py``. strict_extensions : bool, optional Should the run fail if any extensions fail to load. If this is false, a warning will be raised instead. environ : mapping[str -> str], optional The os environment to use. Many extensions use this to get parameters. This defaults to ``os.environ``. blotter : str or zipline.finance.blotter.Blotter, optional Blotter to use with this algorithm. If passed as a string, we look for a blotter construction function registered with ``zipline.extensions.register`` and call it with no parameters. Default is a :class:`zipline.finance.blotter.SimulationBlotter` that never cancels orders. broker : instance of zipline.gens.brokers.broker.Broker state_filename : path to pickle file storing the algorithm "context" (similar to self) Returns ------- perf : pd.DataFrame The daily performance of the algorithm. See Also -------- zipline.data.bundles.bundles : The available data bundles. """ load_extensions(default_extension, extensions, strict_extensions, environ) try: register_calendar_alias('NYSEMKT', 'XNYS') register_calendar_alias('OTC', 'XNYS') except CalendarNameCollision as e: log.info(e) return _run(handle_data=handle_data, initialize=initialize, before_trading_start=before_trading_start, analyze=analyze, algofile=None, algotext=None, defines=(), data_frequency=data_frequency, capital_base=capital_base, bundle=bundle, bundle_timestamp=bundle_timestamp, start=start, end=end, output=os.devnull, trading_calendar=trading_calendar, print_algo=False, metrics_set=metrics_set, local_namespace=False, environ=environ, blotter=blotter, benchmark_symbol=benchmark_symbol, broker=broker, state_filename=state_filename)
def compute(self, today, assets, out): log.info('2', today) out[:] = 0
def connectionClosed(self): self.unrecoverable_error = True log.info("IB Connection closed")
def _run(handle_data, initialize, before_trading_start, analyze, algofile, algotext, defines, data_frequency, capital_base, bundle, bundle_timestamp, start, end, output, trading_calendar, print_algo, metrics_set, local_namespace, environ, blotter, benchmark_symbol, broker, state_filename): """Run a backtest for the given algorithm. This is shared between the cli and :func:`zipline.run_algo`. additions useful for live trading: broker - wrapper to connect to a real broker state_filename - saving the context of the algo to be able to restart """ log.info("Using bundle '%s'." % bundle) if trading_calendar is None: trading_calendar = get_calendar('XNYS') bundle_data = load_sharadar_bundle(bundle) now = pd.Timestamp.utcnow() if start is None: start = bundle_data.equity_daily_bar_reader.first_trading_day if not broker else now if not trading_calendar.is_session(start.date()): start = trading_calendar.next_open(start) if end is None: end = bundle_data.equity_daily_bar_reader.last_available_dt if not broker else start # date parameter validation if trading_calendar.session_distance(start, end) < 0: raise _RunAlgoError( 'There are no trading days between %s and %s' % ( start.date(), end.date(), ), ) if broker: log.info("Live Trading on %s." % start.date()) else: log.info("Backtest from %s to %s." % (start.date(), end.date())) if benchmark_symbol: benchmark = symbol(benchmark_symbol) benchmark_sid = benchmark.sid benchmark_returns = load_benchmark_data_bundle( bundle_data.equity_daily_bar_reader, benchmark) else: benchmark_sid = None benchmark_returns = pd.Series(index=pd.date_range(start, end, tz='utc'), data=0.0) # emission_rate is a string representing the smallest frequency at which metrics should be reported. # emission_rate will be either minute or daily. When emission_rate is daily, end_of_bar will not be called at all. emission_rate = 'daily' if algotext is not None: if local_namespace: # noinspection PyUnresolvedReferences ip = get_ipython() # noqa namespace = ip.user_ns else: namespace = {} for assign in defines: try: name, value = assign.split('=', 2) except ValueError: raise ValueError( 'invalid define %r, should be of the form name=value' % assign, ) try: # evaluate in the same namespace so names may refer to # eachother namespace[name] = eval(value, namespace) except Exception as e: raise ValueError( 'failed to execute definition for name %r: %s' % (name, e), ) elif defines: raise _RunAlgoError( 'cannot pass define without `algotext`', "cannot pass '-D' / '--define' without '-t' / '--algotext'", ) else: namespace = {} if algofile is not None: algotext = algofile.read() if print_algo: if PYGMENTS: highlight( algotext, PythonLexer(), TerminalFormatter(), outfile=sys.stdout, ) else: click.echo(algotext) first_trading_day = \ bundle_data.equity_daily_bar_reader.first_trading_day if isinstance(metrics_set, six.string_types): try: metrics_set = metrics.load(metrics_set) except ValueError as e: raise _RunAlgoError(str(e)) if isinstance(blotter, six.string_types): try: blotter = load(Blotter, blotter) except ValueError as e: raise _RunAlgoError(str(e)) # Special defaults for live trading if broker: data_frequency = 'minute' # No benchmark benchmark_sid = None benchmark_returns = pd.Series(index=pd.date_range(start, end, tz='utc'), data=0.0) broker.daily_bar_reader = bundle_data.equity_daily_bar_reader if start.date() < now.date(): backtest_start = start backtest_end = bundle_data.equity_daily_bar_reader.last_available_dt if not os.path.exists(state_filename): log.info("Backtest from %s to %s." % (backtest_start.date(), backtest_end.date())) backtest_data = DataPortal( bundle_data.asset_finder, trading_calendar=trading_calendar, first_trading_day=first_trading_day, equity_minute_reader=bundle_data.equity_minute_bar_reader, equity_daily_reader=bundle_data.equity_daily_bar_reader, adjustment_reader=bundle_data.adjustment_reader, ) backtest = create_algo_class( TradingAlgorithm, backtest_start, backtest_end, algofile, algotext, analyze, before_trading_start, benchmark_returns, benchmark_sid, blotter, bundle_data, capital_base, backtest_data, 'daily', emission_rate, handle_data, initialize, metrics_set, namespace, trading_calendar) ctx_blacklist = ['trading_client'] ctx_whitelist = ['perf_tracker'] ctx_excludes = ctx_blacklist + [ e for e in backtest.__dict__.keys() if e not in ctx_whitelist ] backtest.run() #TODO better logic for the checksumq checksum = getattr(algofile, 'name', '<algorithm>') store_context(state_filename, context=backtest, checksum=checksum, exclude_list=ctx_excludes) else: log.warn("State file already exists. Do not run the backtest.") # Set start and end to now for live trading start = pd.Timestamp.utcnow() if not trading_calendar.is_session(start.date()): start = trading_calendar.next_open(start) end = start # TODO inizia qui per creare un prerun dell'algo prima del live trading # usare store_context prima di passare da TradingAlgorithm a LiveTradingAlgorithm TradingAlgorithmClass = (partial( LiveTradingAlgorithm, broker=broker, state_filename=state_filename) if broker else TradingAlgorithm) DataPortalClass = (partial(DataPortalLive, broker) if broker else DataPortal) data = DataPortalClass( bundle_data.asset_finder, trading_calendar=trading_calendar, first_trading_day=first_trading_day, equity_minute_reader=bundle_data.equity_minute_bar_reader, equity_daily_reader=bundle_data.equity_daily_bar_reader, adjustment_reader=bundle_data.adjustment_reader, ) algo = create_algo_class(TradingAlgorithmClass, start, end, algofile, algotext, analyze, before_trading_start, benchmark_returns, benchmark_sid, blotter, bundle_data, capital_base, data, data_frequency, emission_rate, handle_data, initialize, metrics_set, namespace, trading_calendar) perf = algo.run() if output == '-': click.echo(str(perf)) elif output != os.devnull: # make the zipline magic not write any data perf.to_pickle(output) return perf
def bind(self): log.info("Connecting: {}:{}:{}".format(self._host, self._port, self.client_id)) self.connect(self._host, self._port, self.client_id)
def _ingest(start_session, calendar=get_calendar('XNYS'), output_dir=get_output_dir(), sanity_check=True): os.makedirs(output_dir, exist_ok=True) print("logfiles:", logfilename) log.info("Start ingesting SEP, SFP and SF1 data into %s ..." % output_dir) end_session = pd.to_datetime(last_available_date()) # Check valid trading dates, according to the selected exchange calendar sessions = calendar.sessions_in_range(start_session, end_session) prices_dbpath = os.path.join(output_dir, "prices.sqlite") # use string format expected by quandl start_fetch_date = sessions[0].strftime('%Y-%m-%d') #end_fetch_date = None if sessions[-1].strftime('%Y-%m-%d') == last_trading_date() else sessions[-1].strftime('%Y-%m-%d') if os.path.exists(prices_dbpath): start_fetch_date = SQLiteDailyBarReader( prices_dbpath).last_available_dt.strftime('%Y-%m-%d') log.info("Last available date: %s" % start_fetch_date) log.info("Start loading sharadar metadata...") related_tickers, sharadar_metadata_df = create_metadata() prices_df = get_data(sharadar_metadata_df, related_tickers, start_fetch_date) if len(prices_df) > 0: # the first price date may differ from start_fetch_date because we query quadl by lastupdate log.info("Price data for %d equities from %s to %s." % (len(prices_df.index.get_level_values(1)), prices_df.index[0][0], prices_df.index[-1][0])) else: log.info("No price data retrieved for period from %s." % start_fetch_date) # iterate over all the securities and pack data and metadata for writing tickers = prices_df['ticker'].unique() log.info("Start creating data for %d equities..." % (len(tickers))) equities_df = create_equities_df(prices_df, tickers, sessions, sharadar_metadata_df, show_progress=True) # Additional MACRO data macro_equities_df = create_macro_equities_df(calendar) equities_df = equities_df.append(macro_equities_df) # Write equity metadata log.info("Start writing equities...") asset_dbpath = os.path.join(output_dir, ("assets-%d.sqlite" % ASSET_DB_VERSION)) asset_db_writer = SQLiteAssetDBWriter(asset_dbpath) asset_db_writer.write(equities=equities_df, exchanges=EXCHANGE_DF) # Write PRICING data log.info(("Writing pricing data to '%s'..." % (prices_dbpath))) sql_daily_bar_writer = SQLiteDailyBarWriter(prices_dbpath, calendar) prices_df.sort_index(inplace=True) sql_daily_bar_writer.write(prices_df) # DIVIDENDS log.info("Creating dividends data...") dividends_df = create_dividends_df(sharadar_metadata_df, related_tickers, tickers, start_fetch_date) # SPLITS log.info("Creating splits data...") splits_df = create_splits_df(sharadar_metadata_df, related_tickers, tickers, start_fetch_date) # mergers? # see also https://github.com/quantopian/zipline/blob/master/zipline/data/adjustments.py # Write dividends and splits_df adjustment_dbpath = os.path.join(output_dir, "adjustments.sqlite") sql_daily_bar_reader = SQLiteDailyBarReader(prices_dbpath) asset_db_reader = SQLiteAssetFinder(asset_dbpath) adjustment_writer = SQLiteDailyAdjustmentWriter(adjustment_dbpath, sql_daily_bar_reader, asset_db_reader, sessions) log.info("Start writing %d splits and %d dividends data..." % (len(splits_df), len(dividends_df))) adjustment_writer.write(splits=splits_df, dividends=dividends_df) log.info("Adding macro data from %s ..." % (start_fetch_date)) macro_prices_df = create_macro_prices_df(start_fetch_date, calendar) sql_daily_bar_writer.write(macro_prices_df) log.info("Start writing supplementary_mappings data...") # EQUITY SUPPLEMENTARY MAPPINGS are used for company name, sector, industry and fundamentals financial data. # They could be retrieved by AssetFinder.get_supplementary_field(sid, field_name, as_of_date) log.info("Start creating company info dataframe...") with closing(sqlite3.connect(asset_dbpath)) as conn, conn, closing( conn.cursor()) as cursor: insert_asset_info(sharadar_metadata_df, cursor) start_date_fundamentals = asset_db_reader.last_available_fundamentals_dt log.info("Start creating Fundamentals dataframe...") if must_fetch_entire_table(start_date_fundamentals): log.info("Fetch entire table.") sf1_df = fetch_entire_table(env["QUANDL_API_KEY"], "SHARADAR/SF1", parse_dates=['datekey', 'reportperiod']) else: log.info("Start date: %s" % start_date_fundamentals) sf1_df = fetch_sf1_table_date(env["QUANDL_API_KEY"], start_date_fundamentals) with closing(sqlite3.connect(asset_dbpath)) as conn, conn, closing( conn.cursor()) as cursor: insert_fundamentals(sharadar_metadata_df, sf1_df, cursor, show_progress=True) start_date_metrics = asset_db_reader.last_available_daily_metrics_dt log.info("Start creating daily metrics dataframe...") if must_fetch_entire_table(start_date_metrics): log.info("Fetch entire table.") daily_df = fetch_entire_table(env["QUANDL_API_KEY"], "SHARADAR/DAILY", parse_dates=['date']) else: log.info("Start date: %s" % start_date_fundamentals) daily_df = fetch_table_by_date(env["QUANDL_API_KEY"], 'SHARADAR/DAILY', start_date_metrics) with closing(sqlite3.connect(asset_dbpath)) as conn, conn, closing( conn.cursor()) as cursor: insert_daily_metrics(sharadar_metadata_df, daily_df, cursor, show_progress=True) if sanity_check: if asset_db_writer.check_sanity(): log.info("Sanity check successful!") okay_path = os.path.join(output_dir, "ok") Path(okay_path).touch() log.info("Ingest finished!")
def _ingest(calendar, start_session, end_session): # use 'latest' (SHARADAR_BUNDLE_DIR) as output dir output_dir = get_output_dir() os.makedirs(output_dir, exist_ok=True) print("logfiles:", logfilename) log.info("Start ingesting SEP, SFP and SF1 data into %s ..." % output_dir) # Check valid trading dates, according to the selected exchange calendar sessions = calendar.sessions_in_range(start_session, end_session) prices_dbpath = os.path.join(output_dir, "prices.sqlite") start_fetch_date = None if os.path.exists(prices_dbpath): start_fetch_date = SQLiteDailyBarReader( prices_dbpath).last_available_dt.strftime('%Y-%m-%d') log.info("Last available date: %s" % start_fetch_date) log.info("Start loading sharadar metadata...") sharadar_metadata_df = quandl.get_table('SHARADAR/TICKERS', table=['SFP', 'SEP'], paginate=True) sharadar_metadata_df.set_index('ticker', inplace=True) related_tickers = sharadar_metadata_df['relatedtickers'].dropna() # Add a space at the begin and end of relatedtickers, search for ' TICKER ' related_tickers = ' ' + related_tickers.astype(str) + ' ' prices_df = get_data(sharadar_metadata_df, related_tickers, start=start_fetch_date) # iterate over all the securities and pack data and metadata for writing tickers = prices_df['ticker'].unique() log.info("Start writing price data for %d equities." % (len(tickers))) equities_df = create_equities_df(prices_df, tickers, sessions, sharadar_metadata_df, show_progress=True) # Write PRICING data log.info(("Writing pricing data to '%s'..." % (prices_dbpath))) sql_daily_bar_writer = SQLiteDailyBarWriter(prices_dbpath, calendar) prices_df.sort_index(inplace=True) sql_daily_bar_writer.write(prices_df) # DIVIDENDS log.info("Creating dividends data...") # see also https://github.com/shlomikushchi/zipline-live2/blob/master/zipline/data/bundles/csvdir.py dividends_df = create_dividends_df(prices_df, sharadar_metadata_df) # SPLITS log.info("Creating splits data...") splits_df = create_splits_df(sharadar_metadata_df, related_tickers, tickers, start_fetch_date) # TODO mergers? # see also https://github.com/quantopian/zipline/blob/master/zipline/data/adjustments.py # Write dividends and splits_df sql_daily_bar_reader = SQLiteDailyBarReader(prices_dbpath) adjustment_dbpath = os.path.join(output_dir, "adjustments.sqlite") adjustment_writer = SQLiteDailyAdjustmentWriter( adjustment_dbpath, sql_daily_bar_reader, sessions) log.info("Start writing %d splits and %d dividends data..." % (len(splits_df), len(dividends_df))) adjustment_writer.write(splits=splits_df, dividends=dividends_df) # Additional MACRO data prices_start = prices_df.index[0][0] prices_end = prices_df.index[-1][0] macro_equities_df = create_macro_equities_df(prices_end) equities_df = equities_df.append(macro_equities_df) # Write equity metadata log.info("Start writing equities and supplementary_mappings data...") asset_dbpath = os.path.join(output_dir, ("assets-%d.sqlite" % ASSET_DB_VERSION)) asset_db_writer = SQLiteAssetDBWriter(asset_dbpath) asset_db_writer.write(equities=equities_df, exchanges=EXCHANGE_DF) # EQUITY SUPPLEMENTARY MAPPINGS are used for company name, sector, industry and fundamentals financial data. # They could be retrieved by AssetFinder.get_supplementary_field(sid, field_name, as_of_date) log.info("Start creating company info dataframe...") with closing(sqlite3.connect(asset_dbpath)) as conn, conn, closing( conn.cursor()) as cursor: insert_equity_extra_data_basic(sharadar_metadata_df, cursor) log.info("Start creating Fundamentals dataframe...") if start_fetch_date is None: sf1_df = fetch_entire_table( env["QUANDL_API_KEY"], "SHARADAR/SF1", parse_dates=['datekey', 'reportperiod']) #TODO filter out dimensions other than 'ARQ' and 'ART' else: sf1_df = fetch_sf1_table_date(env["QUANDL_API_KEY"], start_fetch_date) with closing(sqlite3.connect(asset_dbpath)) as conn, conn, closing( conn.cursor()) as cursor: insert_equity_extra_data_sf1(sharadar_metadata_df, sf1_df, cursor, show_progress=True) log.info("Adding macro data from %s to %s ..." % (prices_start, prices_end)) macro_prices_df = create_macro_prices_df(prices_start, prices_end, calendar) sql_daily_bar_writer.write(macro_prices_df) # Predefined Named Universes from sharadar.pipeline.universes import create_tradable_stocks_universe create_tradable_stocks_universe(output_dir, prices_start, prices_end) sane = asset_db_writer.check_sanity() if sane: log.info("Sanity check successful!") okay_path = os.path.join(output_dir, "ok") Path(okay_path).touch() log.info("Ingest finished!")
def serialise(perf, filename, now): suffix = '_' + now.strftime(DATETIME_FMT) + '_perf.dump' perf_dump_file = change_extension(filename, suffix) log.info("Serialise performance date in %s" % perf_dump_file) # joblib.dump(perf, perf_dump_file) perf.to_pickle(perf_dump_file)
inputs = [] window_length = 1 def __new__(self, universe_name): self.universe_name = universe_name universes_db_path = os.path.join(get_output_dir(), "universes.sqlite") self.universe_reader = UniverseReader(universes_db_path) return super(NamedUniverse, self).__new__(self) def compute(self, today, assets, out): sids = self.universe_reader.get_sid(self.universe_name, today.date()) out[:] = assets.isin(sids) if __name__ == "__main__": universe_start = pd.to_datetime('1998-10-16', utc=True) universe_end = pd.to_datetime('2020-12-30', utc=True) from sharadar.util.output_dir import get_output_dir universes_dbpath = os.path.join(get_output_dir(), "universes.sqlite") universe_name = TRADABLE_STOCKS_US screen = TradableStocksUS() universe_last_date = UniverseReader(universes_dbpath).get_last_date( universe_name) if not pd.isnull(universe_last_date): universe_start = universe_last_date log.info("Start creating universe '%s' from %s to %s ..." % (universe_name, universe_start, universe_end)) UniverseWriter(universes_dbpath).write(universe_name, screen, universe_start, universe_end)
def on_exit(self): self.teardown() self.broker.disconnect() log.info( "Today's trading ended. The algo needs to be restarted daily.")