def get_spot_value(self, assets, field, dt, data_frequency): try: return self.broker.get_spot_value(assets, field, dt, data_frequency) except: log.warn("Cannot get %s value for %s at %s" % (field, assets, dt.date())) return pd.NaT if field == 'last_traded' else np.NaN
def get_daily_metrics(asset_finder, assets, field, today, n, mult=1): metric = mult * asset_finder.get_daily_metrics(assets, field, today, n) if np.isnan(metric).all(): # If all NaN (not ingested because delay in computation in SEP) then use the data of the previous day log.warn("No data for %s on %s. Use data from previous day." % (field, today.date())) metric = mult * asset_finder.get_daily_metrics(assets, field, today, n + 1)[0, :] return metric
def get_fundamentals(self, sids, field_name, as_of_date=None, n=1): """ n=1 is the most recent quarter or last ttm, n=2 indicate the previous quarter or ttm and so on... It's different from the original zipline window_length """ result = self._get_result(sids, field_name, as_of_date, n, enforce_date=True) if len(result) == 0: log.warn( "No result: asset_finder().get_fundamentals(%s, %s, %s, n=%s)" % (sids, field_name, as_of_date, n)) return [] #shape: (windows lenghts=1, num of assets) return pd.DataFrame(result).set_index(0).reindex(sids).T.values.astype( 'float64')
def _run(handle_data, initialize, before_trading_start, analyze, algofile, algotext, defines, data_frequency, capital_base, bundle, bundle_timestamp, start, end, output, trading_calendar, print_algo, metrics_set, local_namespace, environ, blotter, benchmark_symbol, broker, state_filename): """Run a backtest for the given algorithm. This is shared between the cli and :func:`zipline.run_algo`. additions useful for live trading: broker - wrapper to connect to a real broker state_filename - saving the context of the algo to be able to restart """ log.info("Using bundle '%s'." % bundle) if trading_calendar is None: trading_calendar = get_calendar('XNYS') bundle_data = load_sharadar_bundle(bundle) now = pd.Timestamp.utcnow() if start is None: start = bundle_data.equity_daily_bar_reader.first_trading_day if not broker else now if not trading_calendar.is_session(start.date()): start = trading_calendar.next_open(start) if end is None: end = bundle_data.equity_daily_bar_reader.last_available_dt if not broker else start # date parameter validation if trading_calendar.session_distance(start, end) < 0: raise _RunAlgoError( 'There are no trading days between %s and %s' % ( start.date(), end.date(), ), ) if broker: log.info("Live Trading on %s." % start.date()) else: log.info("Backtest from %s to %s." % (start.date(), end.date())) if benchmark_symbol: benchmark = symbol(benchmark_symbol) benchmark_sid = benchmark.sid benchmark_returns = load_benchmark_data_bundle( bundle_data.equity_daily_bar_reader, benchmark) else: benchmark_sid = None benchmark_returns = pd.Series(index=pd.date_range(start, end, tz='utc'), data=0.0) # emission_rate is a string representing the smallest frequency at which metrics should be reported. # emission_rate will be either minute or daily. When emission_rate is daily, end_of_bar will not be called at all. emission_rate = 'daily' if algotext is not None: if local_namespace: # noinspection PyUnresolvedReferences ip = get_ipython() # noqa namespace = ip.user_ns else: namespace = {} for assign in defines: try: name, value = assign.split('=', 2) except ValueError: raise ValueError( 'invalid define %r, should be of the form name=value' % assign, ) try: # evaluate in the same namespace so names may refer to # eachother namespace[name] = eval(value, namespace) except Exception as e: raise ValueError( 'failed to execute definition for name %r: %s' % (name, e), ) elif defines: raise _RunAlgoError( 'cannot pass define without `algotext`', "cannot pass '-D' / '--define' without '-t' / '--algotext'", ) else: namespace = {} if algofile is not None: algotext = algofile.read() if print_algo: if PYGMENTS: highlight( algotext, PythonLexer(), TerminalFormatter(), outfile=sys.stdout, ) else: click.echo(algotext) first_trading_day = \ bundle_data.equity_daily_bar_reader.first_trading_day if isinstance(metrics_set, six.string_types): try: metrics_set = metrics.load(metrics_set) except ValueError as e: raise _RunAlgoError(str(e)) if isinstance(blotter, six.string_types): try: blotter = load(Blotter, blotter) except ValueError as e: raise _RunAlgoError(str(e)) # Special defaults for live trading if broker: data_frequency = 'minute' # No benchmark benchmark_sid = None benchmark_returns = pd.Series(index=pd.date_range(start, end, tz='utc'), data=0.0) broker.daily_bar_reader = bundle_data.equity_daily_bar_reader if start.date() < now.date(): backtest_start = start backtest_end = bundle_data.equity_daily_bar_reader.last_available_dt if not os.path.exists(state_filename): log.info("Backtest from %s to %s." % (backtest_start.date(), backtest_end.date())) backtest_data = DataPortal( bundle_data.asset_finder, trading_calendar=trading_calendar, first_trading_day=first_trading_day, equity_minute_reader=bundle_data.equity_minute_bar_reader, equity_daily_reader=bundle_data.equity_daily_bar_reader, adjustment_reader=bundle_data.adjustment_reader, ) backtest = create_algo_class( TradingAlgorithm, backtest_start, backtest_end, algofile, algotext, analyze, before_trading_start, benchmark_returns, benchmark_sid, blotter, bundle_data, capital_base, backtest_data, 'daily', emission_rate, handle_data, initialize, metrics_set, namespace, trading_calendar) ctx_blacklist = ['trading_client'] ctx_whitelist = ['perf_tracker'] ctx_excludes = ctx_blacklist + [ e for e in backtest.__dict__.keys() if e not in ctx_whitelist ] backtest.run() #TODO better logic for the checksumq checksum = getattr(algofile, 'name', '<algorithm>') store_context(state_filename, context=backtest, checksum=checksum, exclude_list=ctx_excludes) else: log.warn("State file already exists. Do not run the backtest.") # Set start and end to now for live trading start = pd.Timestamp.utcnow() if not trading_calendar.is_session(start.date()): start = trading_calendar.next_open(start) end = start # TODO inizia qui per creare un prerun dell'algo prima del live trading # usare store_context prima di passare da TradingAlgorithm a LiveTradingAlgorithm TradingAlgorithmClass = (partial( LiveTradingAlgorithm, broker=broker, state_filename=state_filename) if broker else TradingAlgorithm) DataPortalClass = (partial(DataPortalLive, broker) if broker else DataPortal) data = DataPortalClass( bundle_data.asset_finder, trading_calendar=trading_calendar, first_trading_day=first_trading_day, equity_minute_reader=bundle_data.equity_minute_bar_reader, equity_daily_reader=bundle_data.equity_daily_bar_reader, adjustment_reader=bundle_data.adjustment_reader, ) algo = create_algo_class(TradingAlgorithmClass, start, end, algofile, algotext, analyze, before_trading_start, benchmark_returns, benchmark_sid, blotter, bundle_data, capital_base, data, data_frequency, emission_rate, handle_data, initialize, metrics_set, namespace, trading_calendar) perf = algo.run() if output == '-': click.echo(str(perf)) elif output != os.devnull: # make the zipline magic not write any data perf.to_pickle(output) return perf
def calc_dividend_ratios(self, dividends): """ Calculate the ratios to apply to equities when looking back at pricing history so that the price is smoothed over the ex_date, when the market adjusts to the change in equity value due to upcoming dividend. Returns ------- DataFrame A frame in the same format as splits and mergers, with keys - sid, the id of the equity - effective_date, the date in seconds on which to apply the ratio. - ratio, the ratio to apply to backwards looking pricing data. """ if dividends is None or dividends.empty: return pd.DataFrame(np.array( [], dtype=[ ('sid', uint64_dtype), ('effective_date', uint32_dtype), ('ratio', float64_dtype), ], )) pricing_reader = self._equity_daily_bar_reader input_sids = dividends.sid.values unique_sids, sids_ix = np.unique(input_sids, return_inverse=True) dates = pricing_reader.sessions.values start = pd.Timestamp(dates[0], tz='UTC') end = pd.Timestamp(dates[-1], tz='UTC') calendar = self._equity_daily_bar_reader.trading_calendar data_portal = DataPortal(self._asset_finder, trading_calendar=calendar, first_trading_day=start, equity_daily_reader=self._equity_daily_bar_reader, adjustment_reader= SQLiteAdjustmentReader(self._filename)) close = data_portal.get_history_window(assets=unique_sids, end_dt=end, bar_count=calendar.session_distance(start, end), frequency='1d', field='close', data_frequency='daily').values date_ix = np.searchsorted(dates, dividends.ex_date.values) mask = date_ix > 0 date_ix = date_ix[mask] sids_ix = sids_ix[mask] input_dates = dividends.ex_date.values[mask] # subtract one day to get the close on the day prior to the merger previous_close = close[date_ix - 1, sids_ix] input_sids = input_sids[mask] amount = dividends.amount.values[mask] ratio = 1.0 - amount / previous_close non_nan_ratio_mask = ~np.isnan(ratio) for ix in np.flatnonzero(~non_nan_ratio_mask): ex_date = pd.Timestamp(input_dates[ix], tz='UTC') start_date = self._asset_finder.retrieve_asset(input_sids[ix]).start_date if ex_date != start_date: log.warn( "Couldn't compute ratio for dividend" " sid={sid}, ex_date={ex_date:%Y-%m-%d}, start_date={start_date:%Y-%m-%d}, amount={amount:.3f}", sid=input_sids[ix], ex_date=ex_date, amount=amount[ix], start_date=start_date ) valid_ratio_mask = non_nan_ratio_mask > 0 for ix in np.flatnonzero(~valid_ratio_mask): log.warn( "Dividend ratio <= 0 for dividend" " sid={sid}, ex_date={ex_date:%Y-%m-%d}, amount={amount:.3f}", sid=input_sids[ix], ex_date=pd.Timestamp(input_dates[ix]), amount=amount[ix], ) return pd.DataFrame({ 'sid': input_sids[valid_ratio_mask], 'effective_date': input_dates[valid_ratio_mask], 'ratio': ratio[valid_ratio_mask], })
def create_report(perf, filename, now, doc=None, duration=None, param=None, info=None, show_image=True): if not hasattr(perf, 'returns'): perf['returns'] = perf['pnl'] / (perf['portfolio_value'] - perf['pnl']) perf['returns'] = perf['returns'].replace([np.nan, np.inf, -np.inf], 0.0) tot_positions = sum([len(x) for x in perf.positions]) if tot_positions == 0: log.warn("No positions available") return rets, positions, transactions = pf.utils.extract_rets_pos_txn_from_zipline( perf) date_rows = OrderedDict() if len(rets.index) > 0: date_rows['Start date'] = rets.index[0].strftime('%Y-%m-%d') date_rows['End date'] = rets.index[-1].strftime('%Y-%m-%d') date_rows['Total months'] = int(len(rets) / 21) perf_stats_series = pf.timeseries.perf_stats(rets, positions=positions, transactions=transactions) benchmark_rets = returns(symbol('SPY'), rets.index[0], rets.index[-1]) benchmark_perf_stats = pf.timeseries.perf_stats(benchmark_rets) perf_stats_df = pd.DataFrame(perf_stats_series, columns=['Backtest']) perf_stats_df['Benchmark'] = benchmark_perf_stats perf_stats_df[ 'Spread'] = perf_stats_df['Backtest'] - perf_stats_df['Benchmark'] format_perf_stats(perf_stats_df) drawdown_df = pf.timeseries.gen_drawdown_table(rets, top=5) rets_interesting = pf.timeseries.extract_interesting_date_ranges(rets) positions = utils.check_intraday('infer', rets, positions, transactions) transactions_closed = rt.add_closing_transactions(positions, transactions) trades = rt.extract_round_trips( transactions_closed, portfolio_value=positions.sum(axis='columns') / (1 + rets)) if show_image: fig0 = None fig1 = None fig2 = None fig3 = None fig4 = None fig5 = None try: fig0 = create_log_returns_chart(rets, benchmark_rets) except Exception as e: log.warn(e) try: fig1 = pf.create_returns_tear_sheet(rets, positions, transactions, benchmark_rets=benchmark_rets, return_fig=True) except Exception as e: log.warn(e) try: fig2 = pf.create_position_tear_sheet(rets, positions, return_fig=True) except Exception as e: log.warn(e) try: fig3 = pf.create_txn_tear_sheet(rets, positions, transactions, return_fig=True) except Exception as e: log.warn(e) try: fig4 = pf.create_interesting_times_tear_sheet(rets, return_fig=True) except Exception as e: log.warn(e) try: fig5 = pf.create_round_trip_tear_sheet(rets, positions, transactions, return_fig=True) except Exception as e: log.warn(e) report_suffix = "_%s_%.2f_report.htm" % ( now.strftime(DATETIME_FMT), 100. * perf_stats_series['Annual return']) reportfile = change_extension(filename, report_suffix) with open(reportfile, 'w') as f: print("""<!DOCTYPE html> <html> <head> <title>Performance Report</title> <style > body { font-family: Arial, Helvetica, sans-serif; } table { border-collapse: collapse; } tbody tr:nth-child(odd) { background-color: lightgrey; } tbody tr:nth-child(even) { background-color: white; } tr th { border: none; text-align: right; padding: 2px 5px 2px; } tr td { border: none; text-align: right; padding: 2px 5px 2px; } </style> <script type="text/javascript"> function showElement() { element = document.getElementById('code'); element.style.visibility = 'visible'; } function hideElement() { element = document.getElementById('code'); element.style.visibility = 'hidden'; } </script> </head> <body>""", file=f) print("<h1>Performance report for " + os.path.basename(filename) + "</h1>", file=f) print("<p>Created on %s</p>" % (now), file=f) if duration is not None: print("<p>Backtest executed in %s</p>" % (time.strftime("%H:%M:%S", time.gmtime(duration))), file=f) if doc is not None: print('<h3>Description</h3>', file=f) print('<p style="white-space: pre">%s</p>' % doc.strip(), file=f) if param is not None and len(param) > 0: print('<h3>Parameters</h3>', file=f) print('<pre>%s</pre><br/>' % str(param), file=f) if info is not None and len(info) > 0: print('<h3>Info</h3>', file=f) print('<pre>%s</pre><br/>' % str(info), file=f) print(to_html_table(perf_stats_df, float_format='{0:.2f}'.format, header_rows=date_rows), file=f) print("<br/>", file=f) if show_image: if fig0 is not None: print("<h3>Log Returns</h3>", file=f) print(_to_img(fig0), file=f) print("<br/>", file=f) print(to_html_table( drawdown_df.sort_values('Net drawdown in %', ascending=False), name='Worst drawdown periods', float_format='{0:.2f}'.format, ), file=f) print("<br/>", file=f) print(to_html_table(pd.DataFrame(rets_interesting).describe(). transpose().loc[:, ['mean', 'min', 'max']] * 100, name='Stress Events', float_format='{0:.2f}%'.format), file=f) print("<br/>", file=f) if len(trades) >= 5: stats = rt.gen_round_trip_stats(trades) print(to_html_table(stats['summary'], float_format='{:.2f}'.format, name='Summary stats'), file=f) print("<br/>", file=f) print(to_html_table(stats['pnl'], float_format='${:.2f}'.format, name='PnL stats'), file=f) print("<br/>", file=f) print(to_html_table(stats['duration'], float_format='{:.2f}'.format, name='Duration stats'), file=f) print("<br/>", file=f) print(to_html_table(stats['returns'] * 100, float_format='{:.2f}%'.format, name='Return stats'), file=f) print("<br/>", file=f) stats['symbols'].columns = stats['symbols'].columns.map( format_asset) print(to_html_table(stats['symbols'] * 100, float_format='{:.2f}%'.format, name='Symbol stats'), file=f) if show_image: if fig1 is not None: print("<h3>Returns</h3>", file=f) print(_to_img(fig1), file=f) if fig2 is not None: print("<h3>Positions</h3>", file=f) print(_to_img(fig2), file=f) if fig3 is not None: print("<h3>Transactions</h3>", file=f) print(_to_img(fig3), file=f) if fig4 is not None: print("<h3>Interesting Times</h3>", file=f) print(_to_img(fig4), file=f) if fig5 is not None: print("<h3>Trades</h3>", file=f) print(_to_img(fig5), file=f) print('<br/>', file=f) print( '<button onclick="showElement()">Show Code</button> <button onclick="hideElement()">Hide Code</button>', file=f) print('<pre id="code" style="visibility: hidden">', file=f) print(open(filename, "r").read(), file=f) print('</pre>', file=f) print("</body>\n</html>", file=f)