def test_stress_tca(fill_market_trade_databases): """Makes several large TCARequests at the same time to stress test tcapy application and also to check it works with parallel requests (note: you may need to reduce the length of the dataset if your machine has limited amounts of RAM). It can be possible that when deployed on the web, several users might make simultaneous requests. Note, do not use pylibmc, and instead use python-memcached, when using memcached as a result backend. pylibmc is not thread-safe so will come undone if you end up making parallel requests. """ from tcapy.util.swim import Swim if not (stress_test): return # Clear cache to ensure all test code runs! Mediator.get_volatile_cache().clear_cache() tca_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=valid_ticker_list, dummy_market=True, trade_data_store=trade_data_store, trade_data_database_name=trade_data_database_name, market_data_store=market_data_store, market_data_database_table=market_data_database_table, trade_order_mapping=trade_order_mapping, use_multithreading=True, tca_type='aggregated') # Kick off several simulanteous large TCA requests request_no = 2 tca_request_list = [] for i in range(0, request_no): tca_request_list.append(TCARequest(tca_request=tca_request)) tca_engine = TCAEngineImpl(version=tcapy_version) swim = Swim(parallel_library='thread') pool = swim.create_pool(thread_no=len(tca_request_list)) result = [] for item in tca_request_list: result.append(pool.apply_async(tca_engine.calculate_tca, args=(item, ))) output = [p.get() for p in result] swim.close_pool(pool, True) assert len(output) == len(tca_request_list) # Check that several DataFrames exist in the results for trade_order_results_df_dict in output: assert 'trade_df' in trade_order_results_df_dict.keys()
def create_computation_request(self, **kwargs): """Creates a TCARequest object, populating its' fields with those from a kwargs dictionary, which consisted of parameters such as the start date, finish date, ticker, metrics to be computed, benchmark to be computed etd. The TCARequest object can later be consumed by a TCAEngine when it runs a TCA analysis. Parameters ---------- kwargs : dict For describing a TCA analysis, such as the start date, finish date, ticker etc. Returns ------- TCARequest """ if 'tca_request' in kwargs.keys(): return kwargs['tca_request'] # Convert various string/objects into forms which can be accepted by TCARequest kwargs = self.fill_computation_request_kwargs(kwargs, [ 'trade_order_mapping', 'trade_order_filter', 'benchmark_calcs', 'metric_calcs', 'join_tables', 'filter_time_of_day', 'broker', 'algo', 'dummy_market' ]) # Create a TCARequest object which can be consumed by TCAEngine, to run a TCA calculation return TCARequest(start_date=kwargs['start_date'], finish_date=kwargs['finish_date'], ticker=kwargs['ticker'], venue=kwargs['venue'], event_type=kwargs['event_type'], market_data_store=kwargs['market_data'], tca_type=kwargs['tca_type'], reload=kwargs['reload'], trade_order_mapping=kwargs['trade_order_mapping'], trade_order_filter=kwargs['trade_order_filter'], metric_calcs=kwargs['metric_calcs'], benchmark_calcs=kwargs['benchmark_calcs'], join_tables=kwargs['join_tables'], results_form=kwargs['results_form'], dummy_market=kwargs['dummy_market'])
def test_invalid_dates_missing_data_tca(): """Tests if the trade/order and market data is identical for multithreading versus singlethreading for detailed, aggregated and compliance. Note that we need a running Celery server for multithreading to work (as well as the usual SQL and Arctic databases running, if the test_csv option has not been selected). Uses a very large data sample """ Mediator.get_volatile_cache().clear_cache( ) # clear cache to ensure all test code runs! tca_request = TCARequest(start_date=large_start_date, finish_date=large_finish_date, ticker=valid_ticker_list, trade_data_store=trade_data_store, market_data_store=market_data_store, trade_order_mapping=trade_order_mapping) tca_engine = TCAEngineImpl(version=tcapy_version) tca_type = ['detailed', 'aggregated', 'compliance'] multithreading = [True, False] ## test invalid dates tca_request.start_date = invalid_start_date tca_request.finish_date = invalid_finish_date for t in tca_type: for m in multithreading: Mediator.get_volatile_cache().clear_cache( ) # clear cache to ensure all test code runs! tca_request.use_multithreading = m tca_request.tca_type = t exception_triggered = True try: dict_of_df_invalid = tca_engine.calculate_tca( tca_request=tca_request) exception_triggered = False except DataMissingException: assert exception_triggered ## test a single valid ticker, but missing data (only one ticker) tca_request.start_date = start_date tca_request.finish_date = finish_date tca_request.ticker = missing_ticker for t in tca_type: for m in multithreading: Mediator.get_volatile_cache(version=tcapy_version).clear_cache( ) # clear cache to ensure all test code runs! tca_request.use_multithreading = m tca_request.tca_type = t exception_triggered = True try: dict_of_df_missing_ticker = tca_engine.calculate_tca( tca_request=tca_request) exception_triggered = False except DataMissingException: assert exception_triggered
def test_multithreading_full_basic_tca(): """Tests if the trade/order and market data is identical for multithreading versus singlethreading for detailed, aggregated and compliance. Note that we need a running Celery server for multithreading to work (as well as the usual SQL and Arctic databases running, if the test_csv option has not been selected). Uses a very large data sample """ Mediator.get_volatile_cache().clear_cache( ) # clear cache to ensure all test code runs! tca_request = TCARequest(start_date=multithreading_start_date, finish_date=multithreading_finish_date, ticker=valid_ticker_list, trade_data_store=trade_data_store, market_data_store=market_data_store, trade_order_mapping=trade_order_mapping) tca_engine = TCAEngineImpl(version=tcapy_version) tca_type = ['aggregated', 'detailed', 'compliance'] multithreading = [True, False] #### Checked the executed prices match with single and multithreaded cases for t in tca_type: dict_list = [] for m in multithreading: Mediator.get_volatile_cache(version=tcapy_version).clear_cache( ) # Clear cache to ensure all test code runs! tca_request.use_multithreading = m tca_request.tca_type = t dict_list.append(tca_engine.calculate_tca(tca_request=tca_request)) print("tca_request " + t) for k in dict_list[0].keys(): multi_df = dict_list[0][k] single_df = dict_list[1][k] if isinstance(single_df, pd.DataFrame) and isinstance( multi_df, pd.DataFrame): if 'executed_price' in single_df.columns and 'executed_price' in multi_df.columns: print("tablecomputation " + k) exec_multi = multi_df['executed_price'].dropna() exec_single = single_df['executed_price'].dropna() if dump_csv_output: df = pd.DataFrame(exec_multi) df1 = pd.DataFrame(exec_single) df1.columns = [x + '_single' for x in df1.columns] df = df.join(pd.DataFrame(df1), how='outer') df.to_csv(k + "_test.csv") df_large = single_df.join(multi_df, lsuffix='_single', rsuffix='_multi', how='outer') df_large.to_csv(k + "_test_full.csv") assert all(exec_multi - exec_single < eps) # Only check trade/orders and not any of the other DataFrames returned if 'id' in multi_df.columns and 'id' in single_df.columns: # check we have unique IDs (we should have unique IDs for every event_type trade (but the id's will # be the same for placements) id_multi = multi_df['id'] id_single = single_df['id'] assert len(id_multi.index) == len(id_multi.index) assert len(id_multi.unique()) == len(id_single.index) assert len(id_multi.index) == len(id_single.unique())
# See the License for the specific language governing permissions and limitations under the License. # if __name__ == '__main__': # Need this for WINDOWS machines, to ensure multiprocessing stuff works properly from tcapy.util.swim import Swim; Swim() from tcapy.data.volatilecache import VolatileRedis import datetime; from datetime import timedelta # First delete the Redis cache volatile = VolatileRedis() volatile.clear_cache() from tcapy.analysis.tcaengine import TCARequest, TCAEngineImpl tca_engine = TCAEngineImpl() # Do a massive TCA computation for all currency pairs for the past year # this will cache all the data in Redis, which can be used later finish_date = datetime.datetime.utcnow().date() - timedelta(days=1) start_date = finish_date - timedelta(days=252) tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker='All') tca_engine.calculate_tca(tca_request)