def test_data_offset(): """Tests the offsetting of market and trade data by milliseconds by user. This might be useful if clocks are slightly offset when recording market or trade data """ Mediator.get_volatile_cache().clear_cache() tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=trade_data_store, market_data_store=market_data_store, trade_order_mapping=trade_order_mapping) tca_engine = TCAEngineImpl(version=tcapy_version) dict_of_df = tca_engine.calculate_tca(tca_request=tca_request) # Now offset both the trade and market data tca_request.trade_data_offset_ms = 1 tca_request.market_data_offset_ms = -1 dict_of_df_offset = tca_engine.calculate_tca(tca_request=tca_request) trade_df = dict_of_df[trade_df_name]; market_df = dict_of_df['market_df'] trade_df_offset = dict_of_df_offset[trade_df_name]; market_df_offset = dict_of_df_offset['market_df'] assert all(market_df.index + timedelta(milliseconds=-1) == market_df_offset.index) assert all(trade_df.index + timedelta(milliseconds=1) == trade_df_offset.index) for c in constants.date_columns: if c in trade_df.columns: assert all(trade_df[c]+ timedelta(milliseconds=1) == trade_df_offset[c])
def test_full_detailed_tca_calculation(): """Tests a detailed TCA calculation, checking that it has the right tables returned. """ tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=trade_data_store, market_data_store=market_data_store, trade_order_mapping=trade_order_mapping) tca_engine = TCAEngineImpl(version=tcapy_version) dict_of_df = tca_engine.calculate_tca(tca_request=tca_request) assert (trade_df_name in dict_of_df and 'sparse_market_' + trade_df_name in dict_of_df and 'market_df' in dict_of_df) tca_request.ticker = missing_ticker data_missing_exception = False try: dict_of_df = tca_engine.calculate_tca(tca_request=tca_request) except DataMissingException: data_missing_exception = True assert data_missing_exception
def test_full_detailed_tca_calculation(fill_market_trade_databases): """Tests a detailed TCA calculation, checking that it has the right tables returned. """ tca_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=trade_data_store, trade_data_database_name=trade_data_database_name, market_data_store=market_data_store, market_data_database_table=market_data_database_table, trade_order_mapping=trade_order_mapping, use_multithreading=use_multithreading) tca_engine = TCAEngineImpl(version=tcapy_version) dict_of_df = tca_engine.calculate_tca(tca_request=tca_request) assert ('trade_df' in dict_of_df and 'sparse_market_trade_df' in dict_of_df and 'market_df' in dict_of_df) # Missing ticker won't return any data, internally a DataMissingException is thrown tca_request.ticker = missing_ticker data_missing_exception = False try: dict_of_df = tca_engine.calculate_tca(tca_request=tca_request) except DataMissingException: data_missing_exception = True assert data_missing_exception
def test_overlapping_full_detailed_tca_calculation(): """Tests a detailed TCA calculation works with caching and overlapping dates, checking that it has the right tables returned. """ logger = LoggerManager.getLogger(__name__) tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, tca_type='detailed', trade_data_store=trade_data_store, market_data_store=market_data_store, trade_order_mapping=trade_order_mapping, use_multithreading=True) tca_engine = TCAEngineImpl(version=tcapy_version) dict_of_df = tca_engine.calculate_tca(tca_request=tca_request) sparse_market_trade_df = dict_of_df['sparse_market_' + trade_df_name] logger.info("Running second TCA calculation, extending dates...") # Extend sample tca_request.start_date = pd.Timestamp(start_date) - timedelta(days=10) dict_of_df = tca_engine.calculate_tca(tca_request=tca_request) sparse_market_trade_df = dict_of_df['sparse_market_' + trade_df_name] assert len(sparse_market_trade_df.index[sparse_market_trade_df.index < '01 Feb 2018']) > 0
def test_invalid_dates_missing_data_tca(fill_market_trade_databases): """Tests if the trade/order and market data is identical for use_multithreading versus singlethreading for detailed, aggregated and compliance. Note that we need a running Celery server for use_multithreading to work (as well as the usual SQL and Arctic databases running, if the test_csv option has not been selected). Uses a very large data sample """ Mediator.get_volatile_cache().clear_cache() # Clear cache to ensure all test code runs! tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=valid_ticker_list, trade_data_store=trade_data_store, trade_data_database_name=trade_data_database_name, market_data_store=market_data_store, market_data_database_table=market_data_database_table, trade_order_mapping=trade_order_mapping) tca_engine = TCAEngineImpl(version=tcapy_version) ## Test invalid dates tca_request.start_date = invalid_start_date; tca_request.finish_date = invalid_finish_date for t in tca_type: for m in use_multithreading: tca_request.use_multithreading = m tca_request.tca_type = t exception_triggered = True try: dict_of_df_invalid = tca_engine.calculate_tca(tca_request=tca_request) exception_triggered = False except DataMissingException: assert exception_triggered ## Test a single valid ticker, but missing data (only one ticker) tca_request.start_date = start_date; tca_request.finish_date = finish_date; tca_request.ticker = missing_ticker for t in tca_type: for m in use_multithreading: Mediator.get_volatile_cache().clear_cache() # Clear cache to ensure all test code runs! tca_request.use_multithreading = m tca_request.tca_type = t exception_triggered = True try: dict_of_df_missing_ticker = tca_engine.calculate_tca(tca_request=tca_request) exception_triggered = False except DataMissingException: assert exception_triggered
def multiple_ticker_tca_aggregated_example(): """Example of how to do TCa analysis on multiple tickers """ tca_engine = TCAEngineImpl(version=tca_version) # Run a TCA computation for multiple tickers, calculating slippage tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=mult_ticker, tca_type='aggregated', trade_data_store=trade_data_store, market_data_store=market_data_store, metric_calcs=MetricSlippage(), reporting_currency='EUR') dict_of_df = tca_engine.calculate_tca(tca_request) trade_df = dict_of_df['trade_df'] # Aggregate some of the results with the ResultsSummary class (we could have done this within the TCARequest) summary = ResultsSummary() # Bucket slippage by ticker and report the average summary_slippage_df = summary.field_bucketing(trade_df, aggregate_by_field='ticker') print(summary_slippage_df) # Bucket slippage by ticker & return the average as weighted by the executed notional in reporting currency # (in this case EUR) summary_slippage_df = summary.field_bucketing(trade_df, aggregate_by_field='venue', weighting_field='executed_notional_in_reporting_currency') print(summary_slippage_df) # Bucket slippage by ticker and report the average summary_slippage_df = summary.field_bucketing(trade_df, aggregate_by_field='venue') print(summary_slippage_df)
def simplest_tca_single_ticker_example(): """Example for doing detailed TCA analysis on the trades of a single ticker, calculating metrics for slippage, transient market impact & permanent market impact. It also calculates benchmarks for arrival price of each trade and spread to mid). Collects results for slippage into a daily timeline and also average by venue (by default weights by reporting currency) """ tca_engine = TCAEngineImpl(version=tca_version) # Specify the TCA request tca_request = TCARequest(start_date='01 Nov 2017', finish_date='20 Nov 2017', ticker='AUDUSD', tca_type='detailed', trade_data_store='ms_sql_server', market_data_store='arctic-ncfx', metric_calcs=[MetricSlippage(trade_order_list=['trade_df', 'order_df'])], results_form=[TimelineResultsForm(metric_name='slippage', by_date='date', scalar=10000.0)], benchmark_calcs=[BenchmarkArrival(), BenchmarkMarketSpreadToMid()], trade_order_mapping=['trade_df', 'order_df']) # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) print(dict_of_df.keys()) metric_df = dict_of_df['trade_df']['slippage'] # permanent market impact for every trade print(metric_df.head(500))
def get_sample_data(): logger.info("About to load data for " + ticker) if use_test_csv: tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store='csv', reporting_currency='EUR', market_data_store=os.path.join(folder, 'small_test_market_df.csv.gz'), trade_order_mapping={'trade_df': os.path.join(folder, 'small_test_trade_df.csv'), 'order_df': os.path.join(folder, 'small_test_order_df.csv')}, tca_type=tca_type, benchmark_calcs=BenchmarkMarketMid()) else: tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=trade_data_source, reporting_currency='EUR', market_data_store=market_data_store, trade_order_mapping=['trade_df', 'order_df'], tca_type=tca_type, benchmark_calcs=BenchmarkMarketMid()) tca_engine = TCAEngineImpl() trade_order_results_df_dict = tca_engine.calculate_tca(tca_request) return trade_order_results_df_dict[ticker + '_df'], trade_order_results_df_dict['trade_df'], \ trade_order_results_df_dict['order_df']
def get_sample_data(ticker_spec=None): if ticker_spec is None: ticker_spec = ticker logger.info("About to load data for " + ticker_spec) tca_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=ticker_spec, trade_data_store=trade_data_store, trade_data_database_name=trade_data_database_name, reporting_currency=reporting_currency, market_data_store=market_data_store, market_data_database_table=market_data_database_table, trade_order_mapping=trade_order_mapping, tca_type=tca_type, benchmark_calcs=BenchmarkMarketMid(), use_multithreading=use_multithreading) tca_engine = TCAEngineImpl(version=tcapy_version) trade_order_results_df_dict = tca_engine.calculate_tca(tca_request) return trade_order_results_df_dict[ticker_spec + "_df"], trade_order_results_df_dict[trade_order_list[0]], \ trade_order_results_df_dict[trade_order_list[1]]
def test_tag_filter_calculation(): """Test we can filter by venue and by broker correctly. """ trade_order_filter = TradeOrderFilterTag(tag_value_combinations={'broker_id': 'broker1'}) tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=trade_data_store, reporting_currency=reporting_currency, market_data_store=market_data_store, trade_order_mapping=trade_order_mapping, tca_type=tca_type, trade_order_filter=trade_order_filter, venue='venue1') tca_engine = TCAEngineImpl(version=tcapy_version) trade_order_results_df_dict = tca_engine.calculate_tca(tca_request) trade_df = trade_order_results_df_dict[trade_df_name] if trade_df is not None: if not(trade_df.empty): # note that this only works with the "test" data - it won't work with real data! match_brokers = len(trade_df[trade_df['broker_id'] == 'broker1']) non_brokers = len(trade_df[trade_df['broker_id'] != 'broker1']) match_venue = len(trade_df[trade_df['venue'] == 'venue1']) non_match_venue = len(trade_df[trade_df['venue'] != 'venue1']) # check the filtering has been correctly, so we only have trades by broker1 and venue1 assert match_brokers > 0 and non_brokers == 0 and match_venue > 0 and non_match_venue == 0
def test_invalid_tca_inputs(): """Check exception is thrown with TCAEngine if ticker is not valid (eg. if none, or just a random string of 6 letters, or if the includes '/' """ tca_engine = TCAEngineImpl(version=tcapy_version) invalid_tickers = [None, 'KRPAZY', 'EUR/USD'] for t in invalid_tickers: tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=t) ticker_exception_ok = [] try: trade_order_results_df_dict = tca_engine.calculate_tca(tca_request) ticker_exception_ok.append(False) except Exception as e: if isinstance(e, ValidationException): ticker_exception_ok.append(True) ### Check exception is thrown with TCAEngine if start/finish dates are messed up date_exception_ok = [] try: tca_request = TCARequest(start_date='01 Mar19', finish_date='01Oc t20', ticker='EURUSD') date_exception_ok.append(False) except Exception as e: if isinstance(e, DateException): date_exception_ok.append(True) assert any(ticker_exception_ok) and any(date_exception_ok)
def venue_tca_aggregated_example(): """Example of doing an aggregated TCA computation on a single ticker, and then later calculating the probability distribution function of slippage split by venue (when weighted by executed notional) """ tca_engine = TCAEngineImpl(version=tca_version) tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, tca_type='aggregated', trade_data_store=trade_data_store, market_data_store=market_data_store, metric_calcs=MetricSlippage()) dict_of_df = tca_engine.calculate_tca(tca_request) summary = ResultsSummary() summary_slippage_df = summary.field_distribution( dict_of_df['trade_df'], metric_name='slippage', aggregate_by_field='venue', pdf_only=True, weighting_field='executed_notional') # Plot PDF of slippage, split up by venue Chart(engine='plotly').plot(summary_slippage_df, style=Style(plotly_plot_mode='offline_html', connect_line_gaps=True))
def example_request_mid_benchmark(): """Example of how to do a calculation to do market analysis to calculate mid, resample etc. without any trade data """ from tcapy.analysis.algos.benchmark import BenchmarkMarketMid, BenchmarkMarketSpreadToMid, BenchmarkMarketResampleOffset tca_request = get_tca_request() # Allow analysis to be done in a parallel way day by day # (note: can't do analysis which requires data outside of the day to do this!) tca_request.multithreading_params['splice_request_by_dates'] = True # We'll calculate the market mid, then calculate the spread to the mid, then we shall resample the data into 1 minute # data, taking the mean of each minute (and TWAP) tca_request.benchmark_calcs = [ BenchmarkMarketMid(), BenchmarkMarketSpreadToMid(), BenchmarkMarketResampleOffset(market_resample_freq='1', market_resample_unit='min', price_field='mid', resample_how=['mean', 'twap']) ] tca_request.use_multithreading = True tca_engine = TCAEngineImpl() dict_of_df = tca_engine.calculate_tca(tca_request) print(dict_of_df)
def multiple_ticker_tca_aggregated_with_results_example(): """Example of how to do TCa analysis on multiple tickers with TCAResults """ tca_engine = TCAEngineImpl(version=tca_version) # Run a TCA computation for multiple tickers, calculating slippage tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=mult_ticker, tca_type='aggregated', trade_data_store=trade_data_store, market_data_store=market_data_store, results_form=[ TimelineResultsForm(metric_name='slippage', by_date='datehour', scalar=10000.0) ], metric_calcs=MetricSlippage(), reporting_currency='EUR', summary_display='candlestick') dict_of_df = tca_engine.calculate_tca(tca_request) # Show the output of objects print(dict_of_df.keys()) ### Generate TCA report using high level object # Use higher level TCAResults object to encapsulate results (easier to deal with than a dictionary of DataFrames) tca_results = TCAResults(dict_of_df, tca_request) tca_results.render_computation_charts() print(tca_results.sparse_market_charts.keys()) print(tca_results.sparse_market.keys())
def test_time_of_day_filter_calculation(): """Test we can filter by time of day/date """ trade_order_filter = TradeOrderFilterTimeOfDayWeekMonth( specific_dates=filter_date) tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=trade_data_store, reporting_currency=reporting_currency, market_data_store=market_data_store, trade_order_mapping=trade_order_mapping, tca_type=tca_type, trade_order_filter=trade_order_filter) tca_engine = TCAEngineImpl(version=tcapy_version) trade_order_results_df_dict = tca_engine.calculate_tca(tca_request) trade_df = trade_order_results_df_dict[trade_df_name] if trade_df is not None: if not (trade_df.empty): match_filtered_date = len( trade_df[start_filter_date:finish_filter_date]) non_filtered_date = len( trade_df[(trade_df.index > finish_filter_date) & (trade_df.index < start_filter_date)]) # check the filtering has been correctly, so we only have trades by broker1 and venue1 assert match_filtered_date > 0 and non_filtered_date == 0
def test_create_tca_report(fill_market_trade_databases): """Tests the creation of a TCAResults, checking they are fichecking it generates the right document """ Mediator.get_volatile_cache().clear_cache() tca_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=trade_data_store, trade_data_database_name=trade_data_database_name, market_data_store=market_data_store, market_data_database_table=market_data_database_table, trade_order_mapping=trade_order_mapping, metric_calcs=MetricSlippage(), results_form=TimelineResultsForm(metric_name='slippage', by_date='datehour'), use_multithreading=use_multithreading) tca_engine = TCAEngineImpl(version=tcapy_version) tca_results = TCAResults(tca_engine.calculate_tca(tca_request=tca_request), tca_request) tca_results.render_computation_charts() assert tca_results.timeline is not None and tca_results.timeline_charts is not None tca_report = TCAReport(tca_results) html = tca_report.create_report() # Quick check to see that the html has been generated by checking existance of HTML head _tag assert '<head>' in html
def dataframe_tca_example(): """Example for doing detailed TCA analysis on all the trades in a CSV, calculating metrics for slippage, transient market impact & permanent market impact. It also calculates benchmarks for arrival price of each trade and spread to mid). Collects results for slippage into a daily timeline and also average by venue (by default weights by reporting currency) """ PLOT = False # clear entire cache # Mediator.get_volatile_cache(version='pro').clear_cache() tca_engine = TCAEngineImpl(version=tca_version) trade_order_type = 'trade_df' trade_order_list = ['trade_df'] trade_df = DatabaseSourceCSV(trade_data_database_csv=csv_trade_order_mapping['trade_df']).fetch_trade_order_data() data_frame_trade_order_mapping = OrderedDict([('trade_df', trade_df)]) start_date = trade_df.index[0]; finish_date = trade_df.index[-1] ticker_list = FXConv().correct_unique_notation_list(trade_df['ticker'].unique().tolist()) # Specify the TCA request tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker_list, tca_type='aggregated', dummy_market=True, trade_data_store='dataframe', market_data_store=market_data_store, metric_calcs=[MetricSlippage(trade_order_list=trade_order_list), MetricTransientMarketImpact(transient_market_impact_gap={'ms': 100}, trade_order_list=trade_order_list), MetricPermanentMarketImpact(permanent_market_impact_gap={'h': 1}, trade_order_list=trade_order_list)], results_form=[TimelineResultsForm(metric_name='slippage', by_date='date'), BarResultsForm(metric_name='slippage', aggregate_by_field='venue')], benchmark_calcs=[BenchmarkArrival(), BenchmarkSpreadToMid()], trade_order_mapping=data_frame_trade_order_mapping, use_multithreading=False) # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) print(dict_of_df.keys()) timeline_df = dict_of_df['timeline_' + trade_order_type + '_slippage_by_all'] # average slippage per day metric_df = dict_of_df[trade_order_type]['permanent_market_impact'] # permanent market impact for every trade print(metric_df.head(500)) if PLOT: from chartpy import Chart, Style # plot slippage by timeline Chart(engine='plotly').plot(timeline_df) # plot market impact (per trade) Chart(engine='plotly').plot(metric_df.head(500))
def single_ticker_tca_example_1600LDN_benchmark(): tca_engine = TCAEngineImpl(version=tca_version) trade_order_type = 'trade_df' trade_order_list = ['trade_df', 'order_df'] # specify the TCA request tca_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=ticker, tca_type='detailed', dummy_market=False, trade_data_store=trade_data_store, market_data_store=market_data_store, metric_calcs=[ # Calculate the slippage for trades/order MetricSlippage(trade_order_list=trade_order_list, bid_benchmark='twap1600LDN', ask_benchmark='twap1600LDN', metric_post_fix='twap1600LDN') ], results_form=[ # Aggregate the slippage average by date and hour TimelineResultsForm(metric_name='slippagetwap1600LDN', by_date='date', scalar=10000.0) ], benchmark_calcs=[ # At the arrival price for every trade/order BenchmarkArrival(), # Calculate TWAP over 16:00 LDN BenchmarkTWAP(start_time_before_offset={'m': 2}, finish_time_after_offset={'s': 30}, overwrite_time_of_day='16:00', overwrite_timezone='Europe/London', benchmark_post_fix="1600LDN"), # At the spread at the time of every trade/order BenchmarkMarketSpreadToMid() ], extra_lines_to_plot='twap1600LDN', trade_order_mapping=trade_order_list, use_multithreading=True) # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) print(dict_of_df['trade_df'].head(5)) tca_results = TCAResults(dict_of_df, tca_request) tca_results.render_computation_charts() from tcapy.vis.report.computationreport import JinjaRenderer tca_report = TCAReport(tca_results, renderer=JinjaRenderer()) tca_report.create_report(output_filename='test_tca_twap_report.htm', output_format='html', offline_js=False)
def example_request_mid_benchmark(): """Example of how to do a calculation to do market analysis to calculate mid, resample etc. without any trade data """ from tcapy.analysis.algos.benchmark import BenchmarkMarketMid, BenchmarkMarketSpreadToMid, BenchmarkMarketResampleOffset, \ BenchmarkMarketFilter from tcapy.analysis.algos.resultsform import BarResultsForm, TimelineResultsForm tca_request = get_tca_request() # Allow analysis to be done in a parallel approach day by day # (note: can't do analysis which requires data outside of the daily chunks to do this!) tca_request.multithreading_params['splice_request_by_dates'] = use_multithreading # Filter market data by time of day between 15:00-17:00 LDN # Then calculate the market mid, then calculate the spread to the mid, # Then resample the data into 1 minute, taking the mean of each minute (and TWAP) and calculating the absolute range tca_request.benchmark_calcs = [BenchmarkMarketFilter(time_of_day={'start_time' : "15:00", 'finish_time' : "17:00"}, time_zone='Europe/London'), BenchmarkMarketMid(), BenchmarkMarketSpreadToMid(), BenchmarkMarketResampleOffset(market_resample_freq='1', market_resample_unit='min', price_field='mid', resample_how=['mean', 'twap', 'absrange'], dropna=True), ] # Calculate the mean spread to mid for EURUSD by time of day during our sample (do not weight by any other field) # Calculate the mean absrange for EURUSD by time of day (London timezone)/month of _year (ie. proxy for volatility) tca_request.results_form = \ [TimelineResultsForm(market_trade_order_list='EURUSD', metric_name='ask_mid_spread', weighting_field=None, by_date='time', scalar=10000.0), TimelineResultsForm(market_trade_order_list='EURUSD', metric_name='absrange', weighting_field=None, by_date=['month', 'timeldn'], scalar=10000.0) ] # return tca_request.use_multithreading = True tca_engine = TCAEngineImpl() dict_of_df = tca_engine.calculate_tca(tca_request) # Print out all keys for all the DataFrames returned print(dict_of_df.keys()) # Print market data snapshots print(dict_of_df['EURUSD_df']) print(dict_of_df['USDJPY_df']) print(dict_of_df['EURUSD_df'].columns) print(dict_of_df['USDJPY_df'].columns) # Print out mean spread by time of day print(dict_of_df['timeline_EURUSD_ask_mid_spread_by/mean_time/all']) # Plot mean spread by time of day and absrange by time of day (in London timezone) Chart(engine='plotly').plot(dict_of_df['timeline_EURUSD_ask_mid_spread_by/mean_time/all']) # Plot absolute range over each minute, averaged by time of day and month of the _year Chart(engine='plotly').plot(dict_of_df['timeline_EURUSD_absrange_by/mean_month_timeldn/all'], style=Style(title='EURUSD absolute range by time of day (LDN)', color='Reds', scale_factor=-1))
def tca_example_csv_trade_data_dukascopy(): """Loads up trade/order data from CSV files and market data externally from Dukascopy. Does not use any databases, if you rarely use TCA, this is fine. However, for heavy use of TCA, we strongly recommend maintaining an internal tick database, as external downloading of data can be very slow. In this case we are simply calculating the slippage of every trade and orders above them. """ from tcapy.analysis.tcaengine import TCAEngineImpl from tcapy.analysis.tcarequest import TCARequest from tcapy.analysis.algos.benchmark import BenchmarkArrival, BenchmarkSpreadToMid from tcapy.analysis.algos.metric import MetricSlippage from tcapy.analysis.algos.resultsform import TimelineResultsForm tca_version = constants.tcapy_version tca_engine = TCAEngineImpl(version=tca_version) # The test trade/order data is populated between 25 Apr 2017-05 Jun 2017 # with trades/orders for 'EURUSD', 'USDJPY' and 'EURJPY' csv_trade_order_mapping = OrderedDict([ ('trade_df', os.path.join(folder, 'small_test_trade_df.csv')), ('order_df', os.path.join(folder, 'small_test_order_df.csv')) ]) # Specify the TCA request (note: by specifiying multithreading is False, we avoid dependencies like Celery # Depending on how the caching is setup, tcapy may try to download market data in monthly/weekly chunks and cache them, # To force deletion of the cache you can run the below # volatile_cache.clear_cache() # However if you run TCA for the same period, it will load the market data from Redis/in-memory, rather than # downloading it externally from Dukascopy tca_request = TCARequest( start_date='05 May 2017', finish_date='10 May 2017', ticker=['EURUSD'], tca_type='detailed', trade_data_store='csv', market_data_store='dukascopy', trade_order_mapping=csv_trade_order_mapping, metric_calcs=[MetricSlippage()], results_form=[ TimelineResultsForm(metric_name='slippage', by_date='datehour', scalar=10000.0) ], benchmark_calcs=[BenchmarkArrival(), BenchmarkSpreadToMid()], use_multithreading=False) # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) print(dict_of_df.keys())
def get_sample_data(): """Load sample market/trade/order data """ logger.info("About to load data for " + ticker[0]) tca_engine = TCAEngineImpl() trade_order_results_df_dict = tca_engine.calculate_tca(get_tca_request()) return trade_order_results_df_dict[ticker[0] + '_df']
def get_sample_data(): from tcapy.analysis.algos.benchmark import BenchmarkMarketSpreadToMid logger.info("About to load data for " + ticker) tca_request = TCARequest(start_date='01 May 2017', finish_date='15 May 2017', ticker=ticker, trade_data_store='ms_sql_server', market_data_store=market_data_store, benchmark_calcs=[BenchmarkMarketSpreadToMid(bid_mid_bp=bid_mid_bp, ask_mid_bp=ask_mid_bp)], trade_order_mapping=['trade_df'], tca_type=tca_type) tca_engine = TCAEngineImpl() trade_order_results_df_dict = tca_engine.calculate_tca(tca_request) trade_df = trade_order_results_df_dict['trade_df'] return trade_order_results_df_dict[ticker + '_df'], trade_df
def compare_multithreading_type(): """Compares different type of use_multithreading types """ tca_engine = TCAEngineImpl(version=tca_version) trade_order_list = ['trade_df', 'order_df'] use_multithreading_list = [True, False] multithreading_params_list = [ {'splice_request_by_dates' : True, 'cache_period' : 'day', 'cache_period_trade_data' : True, 'cache_period_market_data' : True, 'return_cache_handles_trade_data' : True, 'return_cache_handles_market_data' : True, 'parallel_library' : 'celery'}, {'splice_request_by_dates': False, 'cache_period': 'day', 'cache_period_trade_data': True, 'cache_period_market_data': True, 'return_cache_handles_trade_data' : True, 'return_cache_handles_market_data' : True, 'parallel_library': 'celery'} ] for use_multithreading in use_multithreading_list: for multithreading_params in multithreading_params_list: start = time.time() # Specify the TCA request tca_request = TCARequest(start_date=long_start_date, finish_date=long_finish_date, ticker=ticker, tca_type='detailed', trade_data_store=trade_data_store, market_data_store=market_data_store, metric_calcs=[MetricSlippage(trade_order_list=trade_order_list), MetricTransientMarketImpact(transient_market_impact_gap={'ms': 100}, trade_order_list=trade_order_list), MetricPermanentMarketImpact(permanent_market_impact_gap={'h': 1}, trade_order_list=trade_order_list)], results_form=[TimelineResultsForm(metric_name='slippage', by_date='datehour', scalar=10000.0)], benchmark_calcs=[BenchmarkArrival(), BenchmarkMarketSpreadToMid()], trade_order_mapping=trade_order_list, use_multithreading=use_multithreading, multithreading_params=multithreading_params) # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) finish = time.time() print('Multithreading example: calculated ' + str(round(finish - start, 3)) + "s for, use_multithreading = " + str(use_multithreading) + ' multithreading_params = ' + str(multithreading_params))
def test_overlapping_full_detailed_tca_calculation(): """Tests a detailed TCA calculation works with caching and overlapping dates, checking that it has the right tables returned. """ tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, tca_type='detailed', trade_data_store=trade_data_store, trade_data_database_name=trade_data_database_name, market_data_store=market_data_store, market_data_database_table=market_data_database_table, trade_order_mapping=trade_order_mapping, use_multithreading=use_multithreading) tca_engine = TCAEngineImpl(version=tcapy_version) # Extend sample tca_request.start_date = pd.Timestamp(start_date) - timedelta(days=10) dict_of_df = tca_engine.calculate_tca(tca_request=tca_request) sparse_market_trade_df = dict_of_df['sparse_market_trade_df'] assert len(sparse_market_trade_df.index[sparse_market_trade_df.index < '01 Jun 2017']) > 0
class TCACaller(ComputationCaller, ABC): """Abstract class which adds listeners to the GUI buttons in the tcapy application for doing TCA _calculations. At initialisation it adds listeners for these buttons and links them to the various text box inputs (where the user can specify the various TCA parameters such as start date, finish date, ticker, TCA metrics etc.) When a button is pressed it triggers various "calculate" methods, which convert the GUI input, into TCARequest objects which are then sent to TCAEngine for doing the actual TCA computation. This analysis is then cached in Redis. The completion of this calculation will then trigger a callback from every display component (such as a plot or table) which search the cache for the appropriate output to display. If a user wishes to create programmatically call tcapy, it is recommended they create a TCARequest directly, rather than attempting to use TCACaller, and then submit that to a TCAEngine. """ def __init__(self, app, session_manager, callback_manager, glob_volatile_cache, layout, callback_dict=None): super(TCACaller, self).__init__(app, session_manager, callback_manager, glob_volatile_cache, layout, callback_dict=callback_dict) self._generic_plot_flags = { '_candle_timeline_trade_order': 'candle-timeline-plot', '_table_trade_order': 'table', '_dist_trade_order': 'dist-plot', '_download_link_trade_order': 'download-link', '_timeline_trade_order': 'timeline-plot', '_bar_trade_order': 'bar-plot', '_dist_trade_order': 'dist-plot', '_metric_table_trade_order': 'table' } self._generic_line_flags = { '_candle_timeline_trade_order': [ 'candle-timeline-plot-lines-old', 'candle-timeline-plot-lines-relayoutData-old' ] } self._plot_flags = self.create_plot_flags(session_manager, layout) self._reload_val_dict = {None: False, 'yes': True, 'no': False} self._tca_engine = TCAEngineImpl() def fill_computation_request_kwargs(self, kwargs, fields): """Fills a dictionary with the appropriate parameters which can be consumed by a TCARequest object. This involves a large number of object conversations, eg. str based dates to TimeStamps, metric names to Metric objects etc. Parameters ---------- kwargs : dict Contains parameters related to TCA analysis fields : str(list) List of TCA fields we should fill with None if they don't exist in kwargs Returns ------- dict """ # Fill the major fields kwargs['ticker'] = self._util_func.remove_none_list(kwargs['ticker']) kwargs['venue'] = self._util_func.remove_none_list(kwargs['venue']) # Convert date strings into TimeStamp formats kwargs['start_date'] = pd.Timestamp( self._util_func.parse_datetime(str(kwargs['start_date']))) kwargs['finish_date'] = pd.Timestamp( self._util_func.parse_datetime(str(kwargs['finish_date']))) try: kwargs['reload'] = self._reload_val_dict[kwargs['reload']] except: kwargs['reload'] = False if 'event_type' not in kwargs.keys(): kwargs['event_type'] = 'trade' if 'market_data' not in kwargs.keys(): kwargs['market_data'] = constants.default_market_data_store # Fill empty fields with None for f in fields: if f not in kwargs: kwargs[f] = None # Add a trade filter for time day if kwargs['filter_time_of_day'] is not None: if kwargs['filter_time_of_day'] == 'yes': if 'start_time_of_day' in kwargs and 'finish_time_of_day' in kwargs: kwargs = self.add_list_kwargs( kwargs, 'trade_order_filter', TradeOrderFilterTimeOfDayWeekMonth( time_of_day={ 'start_time': kwargs['start_time_of_day'], 'finish_time': kwargs['finish_time_of_day'] })) filter_tags = ['broker', 'algo'] tag_value_combinations = {} for f in filter_tags: if kwargs[f] is not None: tag_value_combinations[f + '_id'] = kwargs[f] if len(tag_value_combinations) > 0: kwargs = self.add_list_kwargs( kwargs, 'trade_order_filter', TradeOrderFilterTag( tag_value_combinations=tag_value_combinations)) # Add metrics which have been specified (including as strings, which will be added with default parameters) if kwargs['metric_calcs'] is not None: if not (isinstance(kwargs['metric_calcs'], list)): kwargs['metric_calcs'] = [kwargs['metric_calcs']] for i in range(0, len(kwargs['metric_calcs'])): kwargs['metric_calcs'][i] = self.fill_metrics( kwargs['metric_calcs'][i], kwargs['metric_trade_order_list'], kwargs['event_type']) return kwargs def fill_metrics(self, metric, metric_trade_order_list, event_type): """Converts string describing metrics to the appropriate Metric object (with default parameters), which can later be consumed by the TCARequest object. Parameters ---------- metric : str or Metric Can be a string (eg. 'slippage', 'transient_market_impact', 'permanent_market_impact') or an actual Metric object metric_trade_order_list : str (list) For which trades/orders should this metric be computed for. event_type : str Trade event type (eg. 'trade', 'cancel', 'cancel/replace' etc) Returns ------- Metric """ # if we are given strings of Metric, we need to create the appropriate Metric object in its place # NOTES: that we'll only have default arguments try: metric = metric.replace(' ', '_') executed_price = 'executed_price' # For placements, we wouldn't have an execution price, so closest we can do is the arrival price = mid for trades if event_type != 'trade': executed_price = 'arrival' if metric == 'slippage': return MetricSlippage(trade_order_list=metric_trade_order_list, executed_price=executed_price) elif metric == 'transient_market_impact': return MetricTransientMarketImpact( trade_order_list=metric_trade_order_list, executed_price=executed_price) elif metric == 'permanent_market_impact': return MetricPermanentMarketImpact( trade_order_list=metric_trade_order_list, executed_price=executed_price) ## ADD new metrics you write here (or better to subclass in your version of TCACaller) except: pass return metric def create_computation_request(self, **kwargs): """Creates a TCARequest object, populating its' fields with those from a kwargs dictionary, which consisted of parameters such as the start date, finish date, ticker, metrics to be computed, benchmark to be computed etd. The TCARequest object can later be consumed by a TCAEngine when it runs a TCA analysis. Parameters ---------- kwargs : dict For describing a TCA analysis, such as the start date, finish date, ticker etc. Returns ------- TCARequest """ if 'tca_request' in kwargs.keys(): return kwargs['tca_request'] # Convert various string/objects into forms which can be accepted by TCARequest kwargs = self.fill_computation_request_kwargs(kwargs, [ 'trade_order_mapping', 'trade_order_filter', 'benchmark_calcs', 'metric_calcs', 'join_tables', 'filter_time_of_day', 'broker', 'algo', 'dummy_market' ]) # Create a TCARequest object which can be consumed by TCAEngine, to run a TCA calculation return TCARequest(start_date=kwargs['start_date'], finish_date=kwargs['finish_date'], ticker=kwargs['ticker'], venue=kwargs['venue'], event_type=kwargs['event_type'], market_data_store=kwargs['market_data'], tca_type=kwargs['tca_type'], reload=kwargs['reload'], trade_order_mapping=kwargs['trade_order_mapping'], trade_order_filter=kwargs['trade_order_filter'], metric_calcs=kwargs['metric_calcs'], benchmark_calcs=kwargs['benchmark_calcs'], join_tables=kwargs['join_tables'], results_form=kwargs['results_form'], dummy_market=kwargs['dummy_market']) def run_computation_request(self, tca_request): """Kicks of the TCA analysis in the underlying TCAEngine using parameters specified Parameters ---------- tca_request : TCARequest Governs start date/finish date, _tickers etc. of TCA analysis Returns ------- dict """ return self._tca_engine.calculate_tca(tca_request)
def tca_example_csv_trade_data_dukascopy_no_redis(): """Running TCA calculation but without any Redis caching at all. In practice, this should be avoided, since it will likely be much slower, given we'll end up accessing market data/trade data a lot more often from a slow source. This is particularly an issue when we're downloading large samples of market data from an external source. For very small time periods this might be fine. """ from tcapy.analysis.tcaengine import TCAEngineImpl from tcapy.analysis.tcarequest import TCARequest from tcapy.analysis.algos.benchmark import BenchmarkArrival, BenchmarkSpreadToMid from tcapy.analysis.algos.metric import MetricSlippage from tcapy.analysis.algos.resultsform import TimelineResultsForm tca_version = constants.tcapy_version tca_engine = TCAEngineImpl(version=tca_version) # The test trade/order data is populated between 25 Apr 2017-05 Jun 2017 # with trades/orders for 'EURUSD', 'USDJPY' and 'EURJPY' csv_trade_order_mapping = OrderedDict([ ('trade_df', os.path.join(folder, 'small_test_trade_df.csv')), ('order_df', os.path.join(folder, 'small_test_order_df.csv')) ]) # Specify the TCA request (note: by specifiying multithreading is False, we avoid dependencies like Celery # Depending on how the caching is setup, tcapy may try to download market data in monthly/weekly chunks and cache them, # To force deletion of the cache you can run the below # volatile_cache.clear_cache() # However if you run TCA for the same period, it will load the market data from Redis/in-memory, rather than # downloading it externally from Dukascopy tca_request = TCARequest( start_date='05 May 2017', finish_date='06 May 2017', ticker=['EURUSD'], tca_type='detailed', trade_data_store='csv', market_data_store='dukascopy', trade_order_mapping=csv_trade_order_mapping, metric_calcs=[MetricSlippage()], results_form=[ TimelineResultsForm(metric_name='slippage', by_date='datehour', scalar=10000.0) ], benchmark_calcs=[BenchmarkArrival(), BenchmarkSpreadToMid()], use_multithreading=False) tca_request.multithreading_params = { 'splice_request_by_dates': False, # True or False 'cache_period': 'month', # month or week # Cache trade data in monthly/periodic chunks in Redis (reduces database calls a lot) 'cache_period_trade_data': False, # Cache market data in monthly/periodic chunks in Redis (reduces database calls a lot) 'cache_period_market_data': False, # Return trade data internally as handles (usually necessary for Celery) 'return_cache_handles_trade_data': False, # Return market data internally as handles (usually necessary for Celery) 'return_cache_handles_market_data': False, # Recommend using Celery, which allows us to reuse Python processes 'parallel_library': 'single' } # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) print(dict_of_df.keys()) market_df = dict_of_df['market_df'] market_df_minute = market_df.resample('1min').last() print(market_df_minute)
def test_multithreading_full_basic_tca(): """Tests if the trade/order and market data is identical for multithreading versus singlethreading for detailed, aggregated and compliance. Note that we need a running Celery server for multithreading to work (as well as the usual SQL and Arctic databases running, if the test_csv option has not been selected). Uses a very large data sample """ Mediator.get_volatile_cache().clear_cache( ) # clear cache to ensure all test code runs! tca_request = TCARequest(start_date=multithreading_start_date, finish_date=multithreading_finish_date, ticker=valid_ticker_list, trade_data_store=trade_data_store, market_data_store=market_data_store, trade_order_mapping=trade_order_mapping) tca_engine = TCAEngineImpl(version=tcapy_version) tca_type = ['aggregated', 'detailed', 'compliance'] multithreading = [True, False] #### Checked the executed prices match with single and multithreaded cases for t in tca_type: dict_list = [] for m in multithreading: Mediator.get_volatile_cache(version=tcapy_version).clear_cache( ) # Clear cache to ensure all test code runs! tca_request.use_multithreading = m tca_request.tca_type = t dict_list.append(tca_engine.calculate_tca(tca_request=tca_request)) print("tca_request " + t) for k in dict_list[0].keys(): multi_df = dict_list[0][k] single_df = dict_list[1][k] if isinstance(single_df, pd.DataFrame) and isinstance( multi_df, pd.DataFrame): if 'executed_price' in single_df.columns and 'executed_price' in multi_df.columns: print("tablecomputation " + k) exec_multi = multi_df['executed_price'].dropna() exec_single = single_df['executed_price'].dropna() if dump_csv_output: df = pd.DataFrame(exec_multi) df1 = pd.DataFrame(exec_single) df1.columns = [x + '_single' for x in df1.columns] df = df.join(pd.DataFrame(df1), how='outer') df.to_csv(k + "_test.csv") df_large = single_df.join(multi_df, lsuffix='_single', rsuffix='_multi', how='outer') df_large.to_csv(k + "_test_full.csv") assert all(exec_multi - exec_single < eps) # Only check trade/orders and not any of the other DataFrames returned if 'id' in multi_df.columns and 'id' in single_df.columns: # check we have unique IDs (we should have unique IDs for every event_type trade (but the id's will # be the same for placements) id_multi = multi_df['id'] id_single = single_df['id'] assert len(id_multi.index) == len(id_multi.index) assert len(id_multi.unique()) == len(id_single.index) assert len(id_multi.index) == len(id_single.unique())
def single_ticker_tca_example(): """Example for doing detailed TCA analysis on the trades of a single ticker, calculating metrics for slippage, transient market impact & permanent market impact. It also calculates benchmarks for arrival price of each trade and spread to mid). Creates a TCAReport which generates standalone HTML and PDF files Also on a lower level it collects results for slippage into a daily timeline and also average by venue (by default weights by reporting currency) """ # Note: running Orca might not work in WSL PLOT = False # clear entire cache # Mediator.get_volatile_cache(version='pro').clear_cache() tca_engine = TCAEngineImpl(version=tca_version) trade_order_type = 'trade_df' trade_order_list = ['trade_df'] # Ensure orca is started, if want to convert to PDF (sometimes you may need to specify the path) # Can be slow to start if PLOT: from chartpy.engine import EnginePlotly EnginePlotly().start_orca() # constants.orca_server_path) # specify the TCA request tca_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=ticker, tca_type='aggregated', dummy_market=False, trade_data_store=trade_data_store, market_data_store=market_data_store, metric_calcs=[ MetricSlippage(trade_order_list=trade_order_list), MetricTransientMarketImpact( transient_market_impact_gap={'ms': 100}, trade_order_list=trade_order_list), MetricPermanentMarketImpact(permanent_market_impact_gap={'h': 1}, trade_order_list=trade_order_list) ], results_form=[ TimelineResultsForm(metric_name='slippage', by_date='datehour', scalar=10000.0), TimelineResultsForm( metric_name='executed_notional_in_reporting_currency', by_date='datehour', aggregation_metric='sum'), BarResultsForm(metric_name='slippage', aggregate_by_field='venue', scalar=10000.0), DistResultsForm(metric_name='slippage', aggregate_by_field='side', scalar=10000.0) ], benchmark_calcs=[BenchmarkArrival(), BenchmarkSpreadToMid()], trade_order_mapping=trade_order_list, use_multithreading=False) # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) print(dict_of_df['trade_df']) print(dict_of_df.keys()) timeline_slippage_df = dict_of_df['timeline_' + trade_order_type + '_slippage_by_all'] timeline_executed_notional_df = dict_of_df[ 'timeline_' + trade_order_type + '_executed_notional_in_reporting_currency_by_all'] # average slippage per day metric_df = dict_of_df[trade_order_type][ 'permanent_market_impact'] # permanent market impact for every trade print(metric_df.head(500)) if PLOT: ### Generate TCA report using high level object # Use higher level TCAResults object to encapsulate results (easier to deal with than a dictionary of DataFrames) tca_results = TCAResults(dict_of_df, tca_request) tca_results.render_computation_charts() tca_report = TCAReport(tca_results) tca_report.create_report(output_filename='test_tca_report.htm', output_format='html') # Note needs plotly orca + wkhtmltopdf installed to render PDFs try: tca_report.create_report(output_filename='test_tca_report.pdf', output_format='pdf') except: pass ### Lower level creation of TCA report from chartpy import Chart, Style, Canvas # Generate HTML file directly Chart(engine='plotly').plot( tca_results.sparse_market_charts['GBPUSD_trade_df'], style=Style(plotly_plot_mode='offline_html')) # Get an HTML string which can be used elsewhere (eg. could use these in other webpages!) html_string = Chart(engine='plotly').plot( tca_results.sparse_market_charts['GBPUSD_trade_df'], style=Style(plotly_plot_mode='offline_embed_js_div')) img_png_string = Chart(engine='plotly').plot( tca_results.sparse_market_charts['GBPUSD_trade_df'], style=Style(plotly_plot_mode='offline_image_png_in_html')) # Using plain template canvas = Canvas([[img_png_string]]) canvas.generate_canvas(silent_display=True, canvas_plotter='plain', page_title='Cuemacro TCA', render_pdf=False) with open('test_tca.html', "w") as text_file: text_file.write(html_string) ### Plot charts individually # Plot slippage by timeline Chart(engine='plotly').plot(timeline_slippage_df) # Plot total executed notional by timeline Chart(engine='plotly').plot(timeline_executed_notional_df) # Plot market impact (per trade) Chart(engine='plotly').plot(metric_df.head(500))
def dataframe_compliance_tca_example(): """Get a DataFrame of trades and apply compliance based TCA to it """ tca_engine = TCAEngineImpl(version=tca_version) spread_to_mid_bp = 0.1 trade_order_list = ['trade_df'] # Read in CSV file as a DataFrame trade_df = DatabaseSourceCSV( trade_data_database_csv=csv_trade_order_mapping['trade_df'] ).fetch_trade_order_data() data_frame_trade_order_mapping = OrderedDict([('trade_df', trade_df)]) ticker_list = FXConv().correct_unique_notation_list( trade_df['ticker'].unique().tolist()) start_date = trade_df.index[0] finish_date = trade_df.index[-1] # Specify the TCA request tca_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=ticker_list, tca_type='aggregated', dummy_market=True, trade_data_store='dataframe', market_data_store=market_data_store, metric_calcs=[ MetricSlippage(trade_order_list=trade_order_list), MetricTransientMarketImpact( transient_market_impact_gap={'ms': 100}, trade_order_list=trade_order_list), MetricPermanentMarketImpact(permanent_market_impact_gap={'h': 1}, trade_order_list=trade_order_list) ], benchmark_calcs=[ # add spread to mid fields for every market data spot BenchmarkSpreadToMid(bid_mid_bp=spread_to_mid_bp, ask_mid_bp=spread_to_mid_bp), ], results_form=[ # Display a table of all the anomalous trades by slippage (ie. outside bid/ask) TableResultsForm( trade_order_list=['trade_df'], metric_name='slippage', filter_by='worst_all', # Order by the worst slippage tag_value_combinations={'slippage_anomalous': 1.0}, # Only flag trades outside bid/ask keep_fields=[ 'executed_notional_in_reporting_currency', 'side' ], # Display only side and executed notionals round_figures_by=None), # Get the total notional executed by broker (in reporting currency) BarResultsForm( trade_order_list=['trade_df'], # trade aggregate_by_field='broker_id', # aggregate by broker name # keep_fields=['executed_notional_in_reporting_currency', 'executed_notional', 'side'], metric_name='executed_notional_in_reporting_currency', # analyse notional aggregation_metric='sum', # sum the notional scalar=1, # no need for a multipler round_figures_by=0), # round to nearest unit # Get average slippage per broker (weighted by notional) BarResultsForm( trade_order_list=['trade_df'], aggregate_by_field='broker_id', metric_name='slippage', aggregation_metric='mean', # keep_fields=['executed_notional_in_reporting_currency', 'executed_notional', # 'side'], weighting_field='executed_notional_in_reporting_currency', # weight results by notional scalar=10000.0, round_figures_by=2) ], # Aggregate the results (total notional and slippage) by broker # into a single table for easy display to the user join_tables=[ JoinTables( tables_dict={ 'table_name': 'jointables_broker_id', # fetch the following calculated tables 'table_list': [ 'bar_trade_df_executed_notional_in_reporting_currency_by_broker_id', 'bar_trade_df_slippage_by_broker_id' ], # append to the columns of each table 'column_list': ['notional (rep cur)', 'slippage (bp)'] }) ], trade_order_mapping=data_frame_trade_order_mapping, use_multithreading=False) # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) # print all the output tables print(dict_of_df.keys()) print('All trades') print(dict_of_df['trade_df']) print('Notional by broker ID') print(dict_of_df[ 'bar_trade_df_executed_notional_in_reporting_currency_by_broker_id']) print('Notional by broker ID and weighted slippage') print(dict_of_df['jointables_broker_id']) print('Trades by worst slippage') print(dict_of_df['table_trade_df_slippage_by_worst_all']) from chartpy import Canvas, Chart broker_notional_chart = Chart( engine='plotly', df=dict_of_df[ 'bar_trade_df_executed_notional_in_reporting_currency_by_broker_id'], chart_type='bar', style=Style(title='Notional in USD per broker')) broker_slippage_chart = Chart( engine='plotly', df=dict_of_df['bar_trade_df_slippage_by_broker_id'], chart_type='bar', style=Style(title='Slippage by broker (bp)')) # Using plain template canvas = Canvas([[broker_notional_chart, broker_slippage_chart]]) canvas.generate_canvas(silent_display=False, canvas_plotter='plain')
def single_ticker_tca_example(): """Example for doing detailed TCA analysis on the trades of a single ticker, calculating metrics for slippage, transient market impact & permanent market impact. It also calculates benchmarks for arrival price of each trade and spread to mid). Creates a TCAReport which generates standalone HTML and PDF files Also on a lower level it collects results for slippage into a daily timeline and also average by venue (by default weights by reporting currency) """ # Note: running Orca might not work in WSL, also when generating Plotly charts, might get an error with WSL, if # it doesn't have silent_display=True, as it will try to open a web page in a browser (which isn't supported in WSL1 # but is in WSL2) PLOT = True # clear entire cache # Mediator.get_volatile_cache().clear_cache() tca_engine = TCAEngineImpl(version=tca_version) trade_order_type = 'trade_df' trade_order_list = ['trade_df', 'order_df'] # Ensure orca is started, if want to convert to PDF (sometimes you may need to specify the path) # Can be slow to start if PLOT: from chartpy.engine import EnginePlotly EnginePlotly().start_orca() # constants.orca_server_path) # specify the TCA request tca_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=ticker, tca_type='detailed', dummy_market=False, trade_data_store=trade_data_store, market_data_store=market_data_store, metric_calcs=[ # Calculate the slippage for trades/order MetricSlippage(trade_order_list=trade_order_list), # Calculate the shorter and longer term market impact after every trade/order MetricTransientMarketImpact( transient_market_impact_gap={'ms': 100}, trade_order_list=trade_order_list), MetricPermanentMarketImpact(permanent_market_impact_gap={'h': 1}, trade_order_list=trade_order_list) ], results_form=[ # Aggregate the slippage average by date and hour TimelineResultsForm(metric_name='slippage', by_date='datehour', scalar=10000.0), # Aggregate the total executed notional in reporting currency (usually USD) # for every hour TimelineResultsForm( metric_name='executed_notional_in_reporting_currency', by_date='datehour', aggregation_metric='sum', scalar=1.0), # Aggregate the average slippage on trades by venue HeatmapResultsForm( metric_name=['slippage', 'transient_market_impact'], aggregate_by_field=['venue', 'ticker'], scalar=10000.0, trade_order_list='trade_df'), # Aggregate the average slippage on trades by venue BarResultsForm(metric_name='slippage', aggregate_by_field='venue', scalar=10000.0, trade_order_list='trade_df'), # Aggregate the average slippage on trades/orders by broker_id BarResultsForm(metric_name='slippage', aggregate_by_field='broker_id', scalar=10000.0), # Aggregate the average slippage on trades/orders by broker_id DistResultsForm(metric_name='slippage', aggregate_by_field='side', scalar=10000.0), # Create a scatter chart of slippage vs. executed notional ScatterResultsForm(scatter_fields=[ 'slippage', 'executed_notional_in_reporting_currency' ], scalar={'slippage': 10000.0}) ], benchmark_calcs=[ # At the arrival price for every trade/order BenchmarkArrival(), # At the spread at the time of every trade/order BenchmarkMarketSpreadToMid() ], trade_order_mapping=trade_order_list, use_multithreading=True) # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) print(dict_of_df['trade_df']) print(dict_of_df.keys()) # Heatmap of slippage and transient market impact broken down by venue and ticker heatmap_slippage_market_impact_df = dict_of_df[ 'heatmap_' + trade_order_type + '_slippage#transient_market_impact_by/mean/venue#ticker'] print(heatmap_slippage_market_impact_df) # Average slippage per date/hour timeline_slippage_df = dict_of_df['timeline_' + trade_order_type + '_slippage_by/mean_datehour/all'] # Total executed notional per date/hour timeline_executed_notional_df = dict_of_df[ 'timeline_' + trade_order_type + '_executed_notional_in_reporting_currency_by/sum_datehour/all'] # Permanent market impact for every trade metric_df = dict_of_df[trade_order_type]['permanent_market_impact'] print(metric_df.head(500)) from tcapy.vis.report.computationreport import JinjaRenderer if PLOT: ### Generate TCA report using high level object # Use higher level TCAResults object to encapsulate results (easier to deal with than a dictionary of DataFrames) tca_results = TCAResults(dict_of_df, tca_request) tca_results.render_computation_charts() tca_report = TCAReport(tca_results, renderer=JinjaRenderer()) tca_report.create_report(output_filename='test_tca_report.htm', output_format='html', offline_js=False) # Note needs plotly orca + wkhtmltopdf installed to render PDFs try: tca_report.create_report(output_filename='test_tca_report.pdf', output_format='pdf') except Exception as e: print(str(e)) ### Lower level creation of TCA report ### Plot charts individually # Plot slippage by timeline Chart(engine='plotly').plot(timeline_slippage_df) # Plot total executed notional by timeline Chart(engine='plotly').plot(timeline_executed_notional_df) # Plot market impact (per trade) Chart(engine='plotly').plot(metric_df.head(500))