def example_calculate_slippage_with_bid_mid_spreads(): """Calculate the slippage for trades given market data as a benchmark """ from tcapy.analysis.algos.metric import MetricSlippage market_df, trade_df = get_sample_data() metric_slippage = MetricSlippage() trade_df, _ = metric_slippage.calculate_metric(trade_df, market_df) print(trade_df)
def test_create_tca_report(fill_market_trade_databases): """Tests the creation of a TCAResults, checking they are fichecking it generates the right document """ Mediator.get_volatile_cache().clear_cache() tca_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=trade_data_store, trade_data_database_name=trade_data_database_name, market_data_store=market_data_store, market_data_database_table=market_data_database_table, trade_order_mapping=trade_order_mapping, metric_calcs=MetricSlippage(), results_form=TimelineResultsForm(metric_name='slippage', by_date='datehour'), use_multithreading=use_multithreading) tca_engine = TCAEngineImpl(version=tcapy_version) tca_results = TCAResults(tca_engine.calculate_tca(tca_request=tca_request), tca_request) tca_results.render_computation_charts() assert tca_results.timeline is not None and tca_results.timeline_charts is not None tca_report = TCAReport(tca_results) html = tca_report.create_report() # Quick check to see that the html has been generated by checking existance of HTML head _tag assert '<head>' in html
def test_results_form_average(): """Tests averages are calculated correctly by ResultsForm, compared to a direct calculation """ market_df, trade_df, order_df = get_sample_data() trade_df, _ = MetricSlippage().calculate_metric(trade_order_df=trade_df, market_df=market_df, bid_benchmark='mid', ask_benchmark='mid') results_form = BarResultsForm(trade_order_list=[trade_df_name], metric_name='slippage', aggregation_metric='mean', aggregate_by_field=['ticker', 'venue'], scalar=10000.0, weighting_field='executed_notional_in_reporting_currency') results_df = results_form.aggregate_results(trade_order_df=trade_df, market_df=market_df, trade_order_name=trade_df_name) slippage_average = float(results_df[0][0].values[0]) # Directly calculate slippage def grab_slippage(trade_df): return 10000.0 * ((trade_df['slippage'] * trade_df['executed_notional_in_reporting_currency']).sum() \ / trade_df['executed_notional_in_reporting_currency'].sum()) slippage_average_comp = grab_slippage(trade_df) # Check the average slippage assert slippage_average - slippage_average_comp < eps slippage_average_venue = results_df[1][0]['venue'][venue_filter] slippage_average_venue_comp = grab_slippage(trade_df[trade_df['venue'] == venue_filter]) # Check the average slippage by venue assert slippage_average_venue - slippage_average_venue_comp < eps
def tca_example_csv_trade_data_dukascopy(): """Loads up trade/order data from CSV files and market data externally from Dukascopy. Does not use any databases, if you rarely use TCA, this is fine. However, for heavy use of TCA, we strongly recommend maintaining an internal tick database, as external downloading of data can be very slow. In this case we are simply calculating the slippage of every trade and orders above them. """ from tcapy.analysis.tcaengine import TCAEngineImpl from tcapy.analysis.tcarequest import TCARequest from tcapy.analysis.algos.benchmark import BenchmarkArrival, BenchmarkSpreadToMid from tcapy.analysis.algos.metric import MetricSlippage from tcapy.analysis.algos.resultsform import TimelineResultsForm tca_version = constants.tcapy_version tca_engine = TCAEngineImpl(version=tca_version) # The test trade/order data is populated between 25 Apr 2017-05 Jun 2017 # with trades/orders for 'EURUSD', 'USDJPY' and 'EURJPY' csv_trade_order_mapping = OrderedDict([ ('trade_df', os.path.join(folder, 'small_test_trade_df.csv')), ('order_df', os.path.join(folder, 'small_test_order_df.csv')) ]) # Specify the TCA request (note: by specifiying multithreading is False, we avoid dependencies like Celery # Depending on how the caching is setup, tcapy may try to download market data in monthly/weekly chunks and cache them, # To force deletion of the cache you can run the below # volatile_cache.clear_cache() # However if you run TCA for the same period, it will load the market data from Redis/in-memory, rather than # downloading it externally from Dukascopy tca_request = TCARequest( start_date='05 May 2017', finish_date='10 May 2017', ticker=['EURUSD'], tca_type='detailed', trade_data_store='csv', market_data_store='dukascopy', trade_order_mapping=csv_trade_order_mapping, metric_calcs=[MetricSlippage()], results_form=[ TimelineResultsForm(metric_name='slippage', by_date='datehour', scalar=10000.0) ], benchmark_calcs=[BenchmarkArrival(), BenchmarkSpreadToMid()], use_multithreading=False) # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) print(dict_of_df.keys())
def test_metric_calculation(fill_market_trade_databases): """Tests slippage calculation on a test set of market and trade data """ market_df, trade_df, order_df = get_sample_data() #### Calculate slippage market_df.index = market_df.index + timedelta(milliseconds=5) # Add a mid point (in case it doesn't exist) market_df = BenchmarkMarketSpreadToMid().calculate_benchmark( market_df=market_df) trade_df, _ = MetricSlippage().calculate_metric(trade_order_df=trade_df, market_df=market_df, bid_benchmark='mid', ask_benchmark='mid') # a selection of points to try ind_list = [0, 1, 2, -2 - 1] for i in ind_list: # Now replicate slippage calculation from first principles (get the last available point if no match) mid_index = market_df['mid'].index.get_loc(trade_df.index[i], method='ffill') trade = trade_df['executed_price'][i] side = trade_df['side'][i] slippage = trade_df['slippage'][i] market_slippage = trade_df['slippage_benchmark'][i] # market = market_df.ix[mid_index]['mid'] market = market_df['mid'][mid_index] # Do slippage calculation for comparison with our method slippage_comp = -side * (trade - market) # Check that the 'slippage' column exists and is consistent assert ('slippage' in trade_df.columns and abs(slippage - slippage_comp) < eps) ### Check anomalous trade identification market_df, trade_df, order_df = get_sample_data() market_df.index = market_df.index + timedelta(milliseconds=10) # Force spread to mid to be 0.25bp anomalous_spread_to_mid_bp = 0.25 market_df = BenchmarkMarketSpreadToMid().calculate_benchmark( market_df=market_df, bid_mid_bp=anomalous_spread_to_mid_bp, ask_mid_bp=anomalous_spread_to_mid_bp, overwrite_bid_ask=True) trade_df, _ = MetricSlippage().calculate_metric(trade_order_df=trade_df, market_df=market_df) anomalous_metric = trade_df[trade_df['slippage_anomalous'] == 1] anomalous_comparison = trade_df[ trade_df['slippage'] <= -(anomalous_spread_to_mid_bp / (100.0 * 100.0))] # Now test if the correct trades have been identified as anomalous assert_frame_equal(anomalous_metric[['slippage_anomalous']], anomalous_comparison[['slippage_anomalous']]) #### Calculate market impact (using bid/ask) market_df, trade_df, order_df = get_sample_data() trade_df, _ = MetricTransientMarketImpact(transient_market_impact_gap={ 'ms': 1250 }).calculate_metric(trade_order_df=trade_df, market_df=market_df, bid_benchmark='bid', ask_benchmark='ask') for i in ind_list: # Now replicate transient market impact calculation from first principles (get the NEXT available point if not available) time_to_search = trade_df.index[i] + timedelta(milliseconds=1250) index = market_df.index.get_loc(time_to_search, method='bfill') index_time = market_df.index[index] trade = trade_df['executed_price'][i] side = trade_df['side'][i] if 'bid' in market_df.columns and 'ask' in market_df.columns: if side == 1: # market = market_df.ix[index]['ask'] market = market_df.iloc[index]['ask'] elif side == -1: # market = market_df.ix[index]['bid'] market = market_df.iloc[index]['bid'] else: # market = market_df.ix[index]['mid'] market = market_df.iloc[index]['mid'] market_transient_impact_benchmark = trade_df[ 'transient_market_impact_benchmark'][i] transient_market_impact = trade_df['transient_market_impact'][i] # do transient market impact calculation for comparison with our method transient_market_impact_comp = side * (trade - market) # check that the 'transient_market_impact' column exists and is consistent assert ('transient_market_impact' in trade_df.columns) and \ (abs(transient_market_impact - transient_market_impact_comp) < eps)
def tca_example_csv_trade_data_dukascopy_no_redis(): """Running TCA calculation but without any Redis caching at all. In practice, this should be avoided, since it will likely be much slower, given we'll end up accessing market data/trade data a lot more often from a slow source. This is particularly an issue when we're downloading large samples of market data from an external source. For very small time periods this might be fine. """ from tcapy.analysis.tcaengine import TCAEngineImpl from tcapy.analysis.tcarequest import TCARequest from tcapy.analysis.algos.benchmark import BenchmarkArrival, BenchmarkSpreadToMid from tcapy.analysis.algos.metric import MetricSlippage from tcapy.analysis.algos.resultsform import TimelineResultsForm tca_version = constants.tcapy_version tca_engine = TCAEngineImpl(version=tca_version) # The test trade/order data is populated between 25 Apr 2017-05 Jun 2017 # with trades/orders for 'EURUSD', 'USDJPY' and 'EURJPY' csv_trade_order_mapping = OrderedDict([ ('trade_df', os.path.join(folder, 'small_test_trade_df.csv')), ('order_df', os.path.join(folder, 'small_test_order_df.csv')) ]) # Specify the TCA request (note: by specifiying multithreading is False, we avoid dependencies like Celery # Depending on how the caching is setup, tcapy may try to download market data in monthly/weekly chunks and cache them, # To force deletion of the cache you can run the below # volatile_cache.clear_cache() # However if you run TCA for the same period, it will load the market data from Redis/in-memory, rather than # downloading it externally from Dukascopy tca_request = TCARequest( start_date='05 May 2017', finish_date='06 May 2017', ticker=['EURUSD'], tca_type='detailed', trade_data_store='csv', market_data_store='dukascopy', trade_order_mapping=csv_trade_order_mapping, metric_calcs=[MetricSlippage()], results_form=[ TimelineResultsForm(metric_name='slippage', by_date='datehour', scalar=10000.0) ], benchmark_calcs=[BenchmarkArrival(), BenchmarkSpreadToMid()], use_multithreading=False) tca_request.multithreading_params = { 'splice_request_by_dates': False, # True or False 'cache_period': 'month', # month or week # Cache trade data in monthly/periodic chunks in Redis (reduces database calls a lot) 'cache_period_trade_data': False, # Cache market data in monthly/periodic chunks in Redis (reduces database calls a lot) 'cache_period_market_data': False, # Return trade data internally as handles (usually necessary for Celery) 'return_cache_handles_trade_data': False, # Return market data internally as handles (usually necessary for Celery) 'return_cache_handles_market_data': False, # Recommend using Celery, which allows us to reuse Python processes 'parallel_library': 'single' } # Dictionary of dataframes as output from TCA calculation dict_of_df = tca_engine.calculate_tca(tca_request) print(dict_of_df.keys()) market_df = dict_of_df['market_df'] market_df_minute = market_df.resample('1min').last() print(market_df_minute)