def _get_market_trade_metrics(self, tca_request_list, dummy_market): """Gets the market and trade data, as well as computed metrics on them Parameters ---------- tca_request_list : TCARequest (list) Requests for multiple TCARequests (eg. for different tickers) dummy_market : bool Return dummy market data? Returns ------- DataFrame (dict), DataFrame (dict) """ tca_ticker_loader = Mediator.get_tca_ticker_loader( version=self._version) market_df_dict = {} trade_order_holder_list = DataFrameHolder() for tca_request_single in tca_request_list: market_df, trade_order_df_dict = tca_ticker_loader.get_market_trade_order_holder( tca_request_single) market_df, trade_order_df_list, ticker, trade_order_keys = \ tca_ticker_loader.calculate_metrics_single_ticker((market_df, trade_order_df_dict), tca_request_single, dummy_market) market_df_dict[ticker] = market_df trade_order_holder_list.add_dataframe_dict( dict(zip(trade_order_keys, trade_order_df_list))) # Unpack the DataFrameHolder into a dictionary (combining the lists of trade, orders etc. into single dataframes) # this may also decompress the trades trade_order_results_df_dict = trade_order_holder_list.get_combined_dataframe_dict( ) return market_df_dict, trade_order_results_df_dict
def _parallel_get_market_trade_metrics(self, tca_request_list, dummy_market): logger = LoggerManager.getLogger(__name__) market_holder_list = DataFrameHolder() trade_order_holder_list = DataFrameHolder() # For each currency pair select collect the trades and market data, then calculate benchmarks and slippage result = [] keep_looping = True # If we have also asked for trades/order if tca_request_list[0].trade_order_mapping is not None: point_in_time_executions_only = \ self._util_func.dict_key_list(tca_request_list[0].trade_order_mapping) == ['trade_df'] else: point_in_time_executions_only = True parallel_library = tca_request_list[0].multithreading_params[ 'parallel_library'] if parallel_library == 'single': # from tcapy.analysis.tcatickerloaderimpl import TCATickerLoaderImpl tca_ticker_loader = Mediator.get_tca_ticker_loader( version=self._version) start_date = tca_request_list[0].start_date finish_date = tca_request_list[0].finish_date # Parameters for the loop i = 0 no_of_tries = 5 # Error trapping for Celery, if have failed event retry it while i < no_of_tries and keep_looping: try: # For each TCA request kick off a thread for tca_request_single_ticker in tca_request_list: # Split up the request by date (monthly/weekly chunks) tca_request_date_split = self._split_tca_request_by_date( tca_request_single_ticker, tca_request_single_ticker.ticker, period=tca_request_single_ticker. multithreading_params['cache_period']) if not(constants.multithreading_params['splice_request_by_dates']) \ or tca_request_list[0].tca_type == 'detailed' \ or tca_request_list[0].tca_type == 'compliance' \ or tca_request_list[0].summary_display == 'candlestick'\ or not(point_in_time_executions_only): if 'celery' in parallel_library: # Load all the data for this ticker and THEN calculate the metrics on it result.append( chord( (get_market_trade_holder_via_celery.s( tca_request_data) for tca_request_data in tca_request_date_split), calculate_metrics_single_ticker_via_celery. s(tca_request_single_ticker, dummy_market)).apply_async()) elif parallel_library == 'single': # This is not actually parallel, but is mainly for debugging purposes for tca_request_s in tca_request_date_split: # print(tca_request_s.start_date) market_df, trade_order_df_dict = tca_ticker_loader.get_market_trade_order_holder( tca_request_s, return_cache_handles=False) market_df, trade_order_df_list, ticker, trade_order_keys = \ tca_ticker_loader.calculate_metrics_single_ticker((market_df, trade_order_df_dict), tca_request_s, dummy_market) market_holder_list.add_dataframe( market_df, ticker) trade_order_holder_list.add_dataframe_dict( dict( zip(trade_order_keys, trade_order_df_list))) else: # Otherwise work on parallel chunks by date # doesn't currently work with orders which straddle day/week/month boundaries # but should work with points in time # # In practice, it's not really much faster than the above code if 'celery' == parallel_library: # For each ticker/date combination load data and process chunk (so can do fully in parallel) result.append( group( get_market_trade_holder_and_calculate_metrics_single_ticker_via_celery .s(tca_request_data, dummy_market) for tca_request_data in tca_request_date_split).apply_async()) # Now combine the results from the parallel operations, if using celery if 'celery' in parallel_library: # Careful, when the output is empty! output = [ p.get(timeout=constants.celery_timeout_seconds) for p in result if p is not None ] # If pipelined/splice_request_by_dates will have two lists so flatten it into one output = self._util_func.flatten_list_of_lists(output) for market_df, trade_order_df_list, ticker, trade_order_keys in output: market_holder_list.add_dataframe(market_df, ticker) # market_df_dict[ticker] = market_df trade_order_holder_list.add_dataframe_dict( dict(zip(trade_order_keys, trade_order_df_list))) del result del output keep_looping = False except DateException as e: raise e keep_looping = False except TradeMarketNonOverlapException as e: raise e keep_looping = False except DataMissingException as e: raise e keep_looping = False except ErrorWritingOverlapDataException as e: raise e keep_looping = False # Exception likely related to Celery and possibly lack of communication with Redis message broker # or Memcached results backend # except Exception as e: except Exception as e: if i == no_of_tries - 1: err_msg = "Failed with " + parallel_library + " after multiple attempts: " + str( e) + ", " + str(traceback.format_exc()) raise Exception(err_msg) i = i + 1 logger.warning("Failed with " + parallel_library + ", trying again for " + str(i) + " time: " + str(e) + ", " + str(traceback.format_exc())) logger.debug("Finished parallel computation") # Expand out the DataFrame holders into dictionaries of DataFrames market_df_dict = market_holder_list.get_combined_dataframe_dict() trade_order_results_df_dict = trade_order_holder_list.get_combined_dataframe_dict( start_date=start_date, finish_date=finish_date) # TODO add candlestick drawing here for cases when using split threading by date trade_order_results_df_dict = self._util_func.remove_keymatch_dict( trade_order_results_df_dict, 'market_df_downsampled') return market_df_dict, trade_order_results_df_dict