示例#1
0
    def _get_market_trade_metrics(self, tca_request_list, dummy_market):
        """Gets the market and trade data, as well as computed metrics on them

        Parameters
        ----------
        tca_request_list : TCARequest (list)
            Requests for multiple TCARequests (eg. for different tickers)

        dummy_market : bool
            Return dummy market data?

        Returns
        -------
        DataFrame (dict), DataFrame (dict)
        """

        tca_ticker_loader = Mediator.get_tca_ticker_loader(
            version=self._version)

        market_df_dict = {}

        trade_order_holder_list = DataFrameHolder()

        for tca_request_single in tca_request_list:
            market_df, trade_order_df_dict = tca_ticker_loader.get_market_trade_order_holder(
                tca_request_single)

            market_df, trade_order_df_list, ticker, trade_order_keys = \
                tca_ticker_loader.calculate_metrics_single_ticker((market_df, trade_order_df_dict),
                                                                        tca_request_single, dummy_market)

            market_df_dict[ticker] = market_df

            trade_order_holder_list.add_dataframe_dict(
                dict(zip(trade_order_keys, trade_order_df_list)))

        # Unpack the DataFrameHolder into a dictionary (combining the lists of trade, orders etc. into single dataframes)
        # this may also decompress the trades
        trade_order_results_df_dict = trade_order_holder_list.get_combined_dataframe_dict(
        )

        return market_df_dict, trade_order_results_df_dict
示例#2
0
    def _parallel_get_market_trade_metrics(self, tca_request_list,
                                           dummy_market):
        logger = LoggerManager.getLogger(__name__)

        market_holder_list = DataFrameHolder()
        trade_order_holder_list = DataFrameHolder()

        # For each currency pair select collect the trades and market data, then calculate benchmarks and slippage
        result = []

        keep_looping = True

        # If we have also asked for trades/order
        if tca_request_list[0].trade_order_mapping is not None:
            point_in_time_executions_only = \
                self._util_func.dict_key_list(tca_request_list[0].trade_order_mapping) == ['trade_df']
        else:
            point_in_time_executions_only = True

        parallel_library = tca_request_list[0].multithreading_params[
            'parallel_library']

        if parallel_library == 'single':
            # from tcapy.analysis.tcatickerloaderimpl import TCATickerLoaderImpl
            tca_ticker_loader = Mediator.get_tca_ticker_loader(
                version=self._version)

        start_date = tca_request_list[0].start_date
        finish_date = tca_request_list[0].finish_date

        # Parameters for the loop
        i = 0
        no_of_tries = 5

        # Error trapping for Celery, if have failed event retry it
        while i < no_of_tries and keep_looping:

            try:
                # For each TCA request kick off a thread
                for tca_request_single_ticker in tca_request_list:

                    # Split up the request by date (monthly/weekly chunks)
                    tca_request_date_split = self._split_tca_request_by_date(
                        tca_request_single_ticker,
                        tca_request_single_ticker.ticker,
                        period=tca_request_single_ticker.
                        multithreading_params['cache_period'])

                    if not(constants.multithreading_params['splice_request_by_dates']) \
                                or tca_request_list[0].tca_type == 'detailed' \
                                or tca_request_list[0].tca_type == 'compliance' \
                                or tca_request_list[0].summary_display == 'candlestick'\
                                or not(point_in_time_executions_only):

                        if 'celery' in parallel_library:
                            # Load all the data for this ticker and THEN calculate the metrics on it
                            result.append(
                                chord(
                                    (get_market_trade_holder_via_celery.s(
                                        tca_request_data) for tca_request_data
                                     in tca_request_date_split),
                                    calculate_metrics_single_ticker_via_celery.
                                    s(tca_request_single_ticker,
                                      dummy_market)).apply_async())
                        elif parallel_library == 'single':
                            # This is not actually parallel, but is mainly for debugging purposes
                            for tca_request_s in tca_request_date_split:

                                # print(tca_request_s.start_date)
                                market_df, trade_order_df_dict = tca_ticker_loader.get_market_trade_order_holder(
                                    tca_request_s, return_cache_handles=False)

                                market_df, trade_order_df_list, ticker, trade_order_keys = \
                                    tca_ticker_loader.calculate_metrics_single_ticker((market_df, trade_order_df_dict),
                                                                                        tca_request_s, dummy_market)

                                market_holder_list.add_dataframe(
                                    market_df, ticker)

                                trade_order_holder_list.add_dataframe_dict(
                                    dict(
                                        zip(trade_order_keys,
                                            trade_order_df_list)))

                    else:
                        # Otherwise work on parallel chunks by date
                        # doesn't currently work with orders which straddle day/week/month boundaries
                        # but should work with points in time
                        #
                        # In practice, it's not really much faster than the above code
                        if 'celery' == parallel_library:

                            # For each ticker/date combination load data and process chunk (so can do fully in parallel)
                            result.append(
                                group(
                                    get_market_trade_holder_and_calculate_metrics_single_ticker_via_celery
                                    .s(tca_request_data, dummy_market)
                                    for tca_request_data in
                                    tca_request_date_split).apply_async())

                # Now combine the results from the parallel operations, if using celery
                if 'celery' in parallel_library:

                    # Careful, when the output is empty!
                    output = [
                        p.get(timeout=constants.celery_timeout_seconds)
                        for p in result if p is not None
                    ]

                    # If pipelined/splice_request_by_dates will have two lists so flatten it into one
                    output = self._util_func.flatten_list_of_lists(output)

                    for market_df, trade_order_df_list, ticker, trade_order_keys in output:
                        market_holder_list.add_dataframe(market_df, ticker)
                        # market_df_dict[ticker] = market_df

                        trade_order_holder_list.add_dataframe_dict(
                            dict(zip(trade_order_keys, trade_order_df_list)))

                    del result
                    del output

                keep_looping = False

            except DateException as e:
                raise e

                keep_looping = False

            except TradeMarketNonOverlapException as e:
                raise e

                keep_looping = False

            except DataMissingException as e:
                raise e

                keep_looping = False

            except ErrorWritingOverlapDataException as e:
                raise e

                keep_looping = False

            # Exception likely related to Celery and possibly lack of communication with Redis message broker
            # or Memcached results backend
            # except Exception as e:
            except Exception as e:
                if i == no_of_tries - 1:
                    err_msg = "Failed with " + parallel_library + " after multiple attempts: " + str(
                        e) + ", " + str(traceback.format_exc())

                    raise Exception(err_msg)

                i = i + 1

                logger.warning("Failed with " + parallel_library +
                               ", trying again for " + str(i) + " time: " +
                               str(e) + ", " + str(traceback.format_exc()))

        logger.debug("Finished parallel computation")

        # Expand out the DataFrame holders into dictionaries of DataFrames
        market_df_dict = market_holder_list.get_combined_dataframe_dict()
        trade_order_results_df_dict = trade_order_holder_list.get_combined_dataframe_dict(
            start_date=start_date, finish_date=finish_date)

        # TODO add candlestick drawing here for cases when using split threading by date
        trade_order_results_df_dict = self._util_func.remove_keymatch_dict(
            trade_order_results_df_dict, 'market_df_downsampled')

        return market_df_dict, trade_order_results_df_dict