def test_stress_tca(fill_market_trade_databases):
    """Makes several large TCARequests at the same time to stress test tcapy application and also to check it works
    with parallel requests (note: you may need to reduce the length of the dataset if your machine has limited amounts of RAM).

    It can be possible that when deployed on the web, several users might make simultaneous requests. Note, do not use
    pylibmc, and instead use python-memcached, when using memcached as a result backend. pylibmc is not thread-safe so
    will come undone if you end up making parallel requests.
    """
    from tcapy.util.swim import Swim

    if not (stress_test):
        return

    # Clear cache to ensure all test code runs!
    Mediator.get_volatile_cache().clear_cache()

    tca_request = TCARequest(
        start_date=start_date,
        finish_date=finish_date,
        ticker=valid_ticker_list,
        dummy_market=True,
        trade_data_store=trade_data_store,
        trade_data_database_name=trade_data_database_name,
        market_data_store=market_data_store,
        market_data_database_table=market_data_database_table,
        trade_order_mapping=trade_order_mapping,
        use_multithreading=True,
        tca_type='aggregated')

    # Kick off several simulanteous large TCA requests
    request_no = 2

    tca_request_list = []

    for i in range(0, request_no):
        tca_request_list.append(TCARequest(tca_request=tca_request))

    tca_engine = TCAEngineImpl(version=tcapy_version)

    swim = Swim(parallel_library='thread')
    pool = swim.create_pool(thread_no=len(tca_request_list))

    result = []

    for item in tca_request_list:
        result.append(pool.apply_async(tca_engine.calculate_tca,
                                       args=(item, )))

    output = [p.get() for p in result]

    swim.close_pool(pool, True)

    assert len(output) == len(tca_request_list)

    # Check that several DataFrames exist in the results
    for trade_order_results_df_dict in output:
        assert 'trade_df' in trade_order_results_df_dict.keys()
示例#2
0
    def create_computation_request(self, **kwargs):
        """Creates a TCARequest object, populating its' fields with those from a kwargs dictionary, which consisted of
        parameters such as the start date, finish date, ticker, metrics to be computed, benchmark to be computed etd.

        The TCARequest object can later be consumed by a TCAEngine when it runs a TCA analysis.

        Parameters
        ----------
        kwargs : dict
            For describing a TCA analysis, such as the start date, finish date, ticker etc.

        Returns
        -------
        TCARequest
        """

        if 'tca_request' in kwargs.keys():
            return kwargs['tca_request']

        # Convert various string/objects into forms which can be accepted by TCARequest
        kwargs = self.fill_computation_request_kwargs(kwargs, [
            'trade_order_mapping', 'trade_order_filter', 'benchmark_calcs',
            'metric_calcs', 'join_tables', 'filter_time_of_day', 'broker',
            'algo', 'dummy_market'
        ])

        # Create a TCARequest object which can be consumed by TCAEngine, to run a TCA calculation
        return TCARequest(start_date=kwargs['start_date'],
                          finish_date=kwargs['finish_date'],
                          ticker=kwargs['ticker'],
                          venue=kwargs['venue'],
                          event_type=kwargs['event_type'],
                          market_data_store=kwargs['market_data'],
                          tca_type=kwargs['tca_type'],
                          reload=kwargs['reload'],
                          trade_order_mapping=kwargs['trade_order_mapping'],
                          trade_order_filter=kwargs['trade_order_filter'],
                          metric_calcs=kwargs['metric_calcs'],
                          benchmark_calcs=kwargs['benchmark_calcs'],
                          join_tables=kwargs['join_tables'],
                          results_form=kwargs['results_form'],
                          dummy_market=kwargs['dummy_market'])
示例#3
0
def test_invalid_dates_missing_data_tca():
    """Tests if the trade/order and market data is identical for multithreading versus singlethreading for detailed,
    aggregated and compliance. Note that we need a running Celery server for multithreading to work (as well as the
    usual SQL and Arctic databases running, if the test_csv option has not been selected). Uses a very large data sample
    """
    Mediator.get_volatile_cache().clear_cache(
    )  # clear cache to ensure all test code runs!

    tca_request = TCARequest(start_date=large_start_date,
                             finish_date=large_finish_date,
                             ticker=valid_ticker_list,
                             trade_data_store=trade_data_store,
                             market_data_store=market_data_store,
                             trade_order_mapping=trade_order_mapping)

    tca_engine = TCAEngineImpl(version=tcapy_version)

    tca_type = ['detailed', 'aggregated', 'compliance']

    multithreading = [True, False]

    ## test invalid dates
    tca_request.start_date = invalid_start_date
    tca_request.finish_date = invalid_finish_date

    for t in tca_type:
        for m in multithreading:
            Mediator.get_volatile_cache().clear_cache(
            )  # clear cache to ensure all test code runs!
            tca_request.use_multithreading = m
            tca_request.tca_type = t

            exception_triggered = True

            try:
                dict_of_df_invalid = tca_engine.calculate_tca(
                    tca_request=tca_request)

                exception_triggered = False

            except DataMissingException:
                assert exception_triggered

    ## test a single valid ticker, but missing data (only one ticker)
    tca_request.start_date = start_date
    tca_request.finish_date = finish_date
    tca_request.ticker = missing_ticker

    for t in tca_type:
        for m in multithreading:
            Mediator.get_volatile_cache(version=tcapy_version).clear_cache(
            )  # clear cache to ensure all test code runs!
            tca_request.use_multithreading = m
            tca_request.tca_type = t

            exception_triggered = True

            try:
                dict_of_df_missing_ticker = tca_engine.calculate_tca(
                    tca_request=tca_request)

                exception_triggered = False

            except DataMissingException:
                assert exception_triggered
示例#4
0
def test_multithreading_full_basic_tca():
    """Tests if the trade/order and market data is identical for multithreading versus singlethreading for detailed,
    aggregated and compliance. Note that we need a running Celery server for multithreading to work (as well as the
    usual SQL and Arctic databases running, if the test_csv option has not been selected). Uses a very large data sample
    """
    Mediator.get_volatile_cache().clear_cache(
    )  # clear cache to ensure all test code runs!

    tca_request = TCARequest(start_date=multithreading_start_date,
                             finish_date=multithreading_finish_date,
                             ticker=valid_ticker_list,
                             trade_data_store=trade_data_store,
                             market_data_store=market_data_store,
                             trade_order_mapping=trade_order_mapping)

    tca_engine = TCAEngineImpl(version=tcapy_version)

    tca_type = ['aggregated', 'detailed', 'compliance']

    multithreading = [True, False]

    #### Checked the executed prices match with single and multithreaded cases
    for t in tca_type:
        dict_list = []

        for m in multithreading:
            Mediator.get_volatile_cache(version=tcapy_version).clear_cache(
            )  # Clear cache to ensure all test code runs!

            tca_request.use_multithreading = m
            tca_request.tca_type = t
            dict_list.append(tca_engine.calculate_tca(tca_request=tca_request))

        print("tca_request " + t)

        for k in dict_list[0].keys():
            multi_df = dict_list[0][k]
            single_df = dict_list[1][k]

            if isinstance(single_df, pd.DataFrame) and isinstance(
                    multi_df, pd.DataFrame):
                if 'executed_price' in single_df.columns and 'executed_price' in multi_df.columns:

                    print("tablecomputation " + k)

                    exec_multi = multi_df['executed_price'].dropna()
                    exec_single = single_df['executed_price'].dropna()

                    if dump_csv_output:
                        df = pd.DataFrame(exec_multi)

                        df1 = pd.DataFrame(exec_single)
                        df1.columns = [x + '_single' for x in df1.columns]

                        df = df.join(pd.DataFrame(df1), how='outer')

                        df.to_csv(k + "_test.csv")

                        df_large = single_df.join(multi_df,
                                                  lsuffix='_single',
                                                  rsuffix='_multi',
                                                  how='outer')
                        df_large.to_csv(k + "_test_full.csv")

                    assert all(exec_multi - exec_single < eps)

                    # Only check trade/orders and not any of the other DataFrames returned
                    if 'id' in multi_df.columns and 'id' in single_df.columns:
                        # check we have unique IDs (we should have unique IDs for every event_type trade (but the id's will
                        # be the same for placements)
                        id_multi = multi_df['id']
                        id_single = single_df['id']

                        assert len(id_multi.index) == len(id_multi.index)

                        assert len(id_multi.unique()) == len(id_single.index)
                        assert len(id_multi.index) == len(id_single.unique())
# See the License for the specific language governing permissions and limitations under the License.
#

if __name__ == '__main__':

    # Need this for WINDOWS machines, to ensure multiprocessing stuff works properly
    from tcapy.util.swim import Swim;

    Swim()

    from tcapy.data.volatilecache import VolatileRedis

    import datetime;
    from datetime import timedelta

    # First delete the Redis cache
    volatile = VolatileRedis()
    volatile.clear_cache()

    from tcapy.analysis.tcaengine import TCARequest, TCAEngineImpl

    tca_engine = TCAEngineImpl()

    # Do a massive TCA computation for all currency pairs for the past year
    # this will cache all the data in Redis, which can be used later
    finish_date = datetime.datetime.utcnow().date() - timedelta(days=1)
    start_date = finish_date - timedelta(days=252)

    tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker='All')
    tca_engine.calculate_tca(tca_request)