示例#1
0
def test_data_offset():
    """Tests the offsetting of market and trade data by milliseconds by user. This might be useful if clocks are slightly
    offset when recording market or trade data
    """
    Mediator.get_volatile_cache().clear_cache()

    tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker,
                             trade_data_store=trade_data_store,
                             market_data_store=market_data_store,
                             trade_order_mapping=trade_order_mapping)

    tca_engine = TCAEngineImpl(version=tcapy_version)

    dict_of_df = tca_engine.calculate_tca(tca_request=tca_request)

    # Now offset both the trade and market data
    tca_request.trade_data_offset_ms = 1
    tca_request.market_data_offset_ms = -1

    dict_of_df_offset = tca_engine.calculate_tca(tca_request=tca_request)

    trade_df = dict_of_df[trade_df_name]; market_df = dict_of_df['market_df']
    trade_df_offset = dict_of_df_offset[trade_df_name]; market_df_offset = dict_of_df_offset['market_df']

    assert all(market_df.index + timedelta(milliseconds=-1) == market_df_offset.index)
    assert all(trade_df.index + timedelta(milliseconds=1) == trade_df_offset.index)

    for c in constants.date_columns:
        if c in trade_df.columns:
            assert all(trade_df[c]+ timedelta(milliseconds=1) == trade_df_offset[c])
示例#2
0
def fill_market_trade_databases():
    """Fills market and trade data with test data
    """
    Mediator.get_volatile_cache().clear_cache()

    replace_append = 'replace'

    # Fill market data (assume: CHUNK_STORE as our default format!)
    for ticker in ticker_arctic:
        database_source = DatabaseSourceArctic(postfix='testharness',
                                               arctic_lib_type=arctic_lib_type)

        # Write CSV to Arctic
        database_source.convert_csv_to_table(csv_market_data_store,
                                             ticker,
                                             market_data_table,
                                             if_exists_table=replace_append,
                                             if_exists_ticker='replace',
                                             market_trade_data='market',
                                             remove_duplicates=False)

        replace_append = 'append'

    # Fill trade/order data
    database_source = DatabaseSourceMySQL()

    for t in trade_order_list:
        # Dump trade_df to SQL test harness database and overwrite
        database_source.convert_csv_to_table(
            csv_trade_order_mapping[t],
            None, (trade_order_mapping[trade_data_store])[t],
            database_name=trade_data_database_name,
            if_exists_table='replace',
            market_trade_data='trade')
示例#3
0
def test_market_data_convention(fill_market_trade_databases):
    """Tests that market data for unusual quotations is consistent (ie. if the user requests USDEUR, this should be
    inverted EURUSD (which is the correct convention)
    """
    Mediator.get_volatile_cache().clear_cache()

    market_loader =  Mediator.get_tca_market_trade_loader(version=tcapy_version)
    market_request = MarketRequest(start_date=start_date, finish_date=finish_date, ticker=ticker,
                             data_store=market_data_store, market_data_database_table=market_data_database_table)

    #### Compare EURUSD to USDEUR
    market_correct_conv_series = pd.DataFrame(market_loader.get_market_data(market_request)['mid'])

    market_request.ticker = 'USDEUR'
    market_reverse_conv_series = pd.DataFrame(1.0 / market_loader.get_market_data(market_request)['mid'])

    assert_frame_equal(market_correct_conv_series, market_reverse_conv_series, check_dtype=False)

    ### Compare EURJPY (which is autogenerated, if EURJPY is not collected directly) vs. EURUSD & USDJPY multiplied

    # Use resampled series for comparison
    market_request.ticker = 'USDJPY'
    market_df_USDJPY = pd.DataFrame(market_loader.get_market_data(market_request)['mid'])

    market_request.ticker = 'EURJPY'
    market_df_EURJPY = pd.DataFrame(market_loader.get_market_data(market_request)['mid']).resample('1min').mean()

    market_df_EURJPY_comp = (market_correct_conv_series.resample('1min').mean() * market_df_USDJPY.resample('1min').mean())

    market_df_EURJPY, market_df_EURJPY_comp = market_df_EURJPY.align(market_df_EURJPY_comp, join='inner')

    comp = (market_df_EURJPY - market_df_EURJPY_comp).dropna()

    assert all(comp < eps)
示例#4
0
def test_executed_price_notional_calculation(fill_market_trade_databases):
    """Test that the executed average price calculation from trades is correctly reflected in the order level
    """
    Mediator.get_volatile_cache().clear_cache()

    market_df, trade_df, order_df = get_sample_data()

    # get the first and last points given boundary cases (and a few other random orders) to check
    index_boundary = np.random.randint(0, len(order_df.index) - 1, 100)
    index_boundary = index_boundary.tolist()
    index_boundary.append(0)
    index_boundary.append(-1)

    for i in index_boundary:
        # order_df.ix[i, 'notional']
        if order_df.iloc[i]['notional'] > 1:
            # executed_price = order_df.ix[i, 'executed_price']
            # id = order_df.ix[i, 'id']
            executed_price = order_df.iloc[i]['executed_price']
            id = order_df.iloc[i]['id']

            executed_price_trade = trade_df[trade_df['ancestor_pointer_id'] ==
                                            id]['executed_price'].fillna(0)
            executed_notional_trade = trade_df[
                trade_df['ancestor_pointer_id'] ==
                id]['executed_notional'].fillna(0)

            executed_avg_trade = (
                (executed_price_trade * executed_notional_trade).sum() /
                executed_notional_trade.sum())

            assert abs(executed_price - executed_avg_trade) < eps
示例#5
0
def test_create_tca_report(fill_market_trade_databases):
    """Tests the creation of a TCAResults, checking they are fichecking it generates the right document
    """
    Mediator.get_volatile_cache().clear_cache()

    tca_request = TCARequest(
        start_date=start_date,
        finish_date=finish_date,
        ticker=ticker,
        trade_data_store=trade_data_store,
        trade_data_database_name=trade_data_database_name,
        market_data_store=market_data_store,
        market_data_database_table=market_data_database_table,
        trade_order_mapping=trade_order_mapping,
        metric_calcs=MetricSlippage(),
        results_form=TimelineResultsForm(metric_name='slippage',
                                         by_date='datehour'),
        use_multithreading=use_multithreading)

    tca_engine = TCAEngineImpl(version=tcapy_version)

    tca_results = TCAResults(tca_engine.calculate_tca(tca_request=tca_request),
                             tca_request)
    tca_results.render_computation_charts()

    assert tca_results.timeline is not None and tca_results.timeline_charts is not None

    tca_report = TCAReport(tca_results)
    html = tca_report.create_report()

    # Quick check to see that the html has been generated by checking existance of HTML head _tag
    assert '<head>' in html
def test_stress_tca(fill_market_trade_databases):
    """Makes several large TCARequests at the same time to stress test tcapy application and also to check it works
    with parallel requests (note: you may need to reduce the length of the dataset if your machine has limited amounts of RAM).

    It can be possible that when deployed on the web, several users might make simultaneous requests. Note, do not use
    pylibmc, and instead use python-memcached, when using memcached as a result backend. pylibmc is not thread-safe so
    will come undone if you end up making parallel requests.
    """
    from tcapy.util.swim import Swim

    if not (stress_test):
        return

    # Clear cache to ensure all test code runs!
    Mediator.get_volatile_cache().clear_cache()

    tca_request = TCARequest(
        start_date=start_date,
        finish_date=finish_date,
        ticker=valid_ticker_list,
        dummy_market=True,
        trade_data_store=trade_data_store,
        trade_data_database_name=trade_data_database_name,
        market_data_store=market_data_store,
        market_data_database_table=market_data_database_table,
        trade_order_mapping=trade_order_mapping,
        use_multithreading=True,
        tca_type='aggregated')

    # Kick off several simulanteous large TCA requests
    request_no = 2

    tca_request_list = []

    for i in range(0, request_no):
        tca_request_list.append(TCARequest(tca_request=tca_request))

    tca_engine = TCAEngineImpl(version=tcapy_version)

    swim = Swim(parallel_library='thread')
    pool = swim.create_pool(thread_no=len(tca_request_list))

    result = []

    for item in tca_request_list:
        result.append(pool.apply_async(tca_engine.calculate_tca,
                                       args=(item, )))

    output = [p.get() for p in result]

    swim.close_pool(pool, True)

    assert len(output) == len(tca_request_list)

    # Check that several DataFrames exist in the results
    for trade_order_results_df_dict in output:
        assert 'trade_df' in trade_order_results_df_dict.keys()
示例#7
0
def test_invalid_dates_missing_data_tca(fill_market_trade_databases):
    """Tests if the trade/order and market data is identical for use_multithreading versus singlethreading for detailed,
    aggregated and compliance. Note that we need a running Celery server for use_multithreading to work (as well as the
    usual SQL and Arctic databases running, if the test_csv option has not been selected). Uses a very large data sample
    """
    Mediator.get_volatile_cache().clear_cache()  # Clear cache to ensure all test code runs!

    tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=valid_ticker_list,
                             trade_data_store=trade_data_store,
                             trade_data_database_name=trade_data_database_name,
                             market_data_store=market_data_store,
                             market_data_database_table=market_data_database_table,
                             trade_order_mapping=trade_order_mapping)

    tca_engine = TCAEngineImpl(version=tcapy_version)

    ## Test invalid dates
    tca_request.start_date = invalid_start_date;
    tca_request.finish_date = invalid_finish_date

    for t in tca_type:
        for m in use_multithreading:
            tca_request.use_multithreading = m
            tca_request.tca_type = t

            exception_triggered = True

            try:
                dict_of_df_invalid = tca_engine.calculate_tca(tca_request=tca_request)

                exception_triggered = False

            except DataMissingException:
                assert exception_triggered

    ## Test a single valid ticker, but missing data (only one ticker)
    tca_request.start_date = start_date;
    tca_request.finish_date = finish_date;
    tca_request.ticker = missing_ticker

    for t in tca_type:
        for m in use_multithreading:
            Mediator.get_volatile_cache().clear_cache()  # Clear cache to ensure all test code runs!
            tca_request.use_multithreading = m
            tca_request.tca_type = t

            exception_triggered = True

            try:
                dict_of_df_missing_ticker = tca_engine.calculate_tca(tca_request=tca_request)

                exception_triggered = False

            except DataMissingException:
                assert exception_triggered
示例#8
0
    def _convert_tuple_to_market_trade(self, market_trade_order_tuple):
        volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine)

        # Gather market and trade/order data (which might be stored in a list)
        if isinstance(market_trade_order_tuple, list):
            market_df_list = []
            trade_order_holder = DataFrameHolder()

            for market_df_single, trade_order_holder_single in market_trade_order_tuple:
                market_df_list.append(market_df_single)

                trade_order_holder.add_dataframe_holder(trade_order_holder_single)

            market_df_list = volatile_cache.get_dataframe_handle(market_df_list, burn_after_reading=True)

            # to ensure that any spurious/None elements are removed
            market_df_list = [x for x in market_df_list if isinstance(x, pd.DataFrame)]

            # want to make sure the data is properly ordered too (not guarenteed we'll get it back in right order)
            market_df = self._time_series_ops.concat_dataframe_list(market_df_list)

        else:
            market_df = volatile_cache.get_dataframe_handle(market_trade_order_tuple[0], burn_after_reading=True)
            trade_order_holder = market_trade_order_tuple[1]

        return market_df, trade_order_holder
示例#9
0
    def _get_correct_convention_market_data(self, market_request, start_date=None, finish_date=None):
        # Check that cross is in correct convention
        if self._fx_conv.correct_notation(market_request.ticker) != market_request.ticker:
            raise Exception('Method expecting only crosses in correct market convention')

        cache = True

        if isinstance(market_request.data_store, pd.DataFrame):
            cache = False

        if market_request.multithreading_params['cache_period_market_data'] and cache:
            volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine)

            start_date, finish_date, market_key, market_df = \
                volatile_cache.get_data_request_cache(market_request, market_request.data_store, 'market_df',
                                                      market_request.data_offset_ms)

            if market_df is None:
                market_df = super(TCATickerLoaderImpl, self)._get_underlying_market_data(start_date, finish_date, market_request)

                volatile_cache.put_data_request_cache(market_request, market_key, market_df)

            return self._strip_start_finish_dataframe(market_df, start_date, finish_date, market_request)
        else:
            if start_date is None or finish_date is None:
                start_date = market_request.start_date
                finish_date = market_request.finish_date

            return super(TCATickerLoaderImpl, self)._get_underlying_market_data(start_date, finish_date,
                                                                                     market_request)
示例#10
0
    def get_market_data(self, market_request, return_cache_handles=False):
        # Handles returns a pointer

        volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine)

        cache = True

        # Don't attempt to cache DataFrames
        if hasattr(market_request, 'market_data_store'):
            if (isinstance(market_request.market_data_store, pd.DataFrame)):
                cache = False
        elif isinstance(market_request.data_store, pd.DataFrame):
            cache = False

        # If we have allowed the caching of monthly/periodic market data
        if market_request.multithreading_params['cache_period_market_data'] and cache:
            old_start_date = market_request.start_date;
            old_finish_date = market_request.finish_date

            # so we can also take TCARequest objects
            if hasattr(market_request, 'market_data_store'):
                data_store = market_request.market_data_store
                data_offset_ms = market_request.market_data_offset_ms
            else:
                data_store = market_request.data_store
                data_offset_ms = market_request.data_offset_ms

            # See if we can fetch from the cache (typically Redis)
            start_date, finish_date, market_key, market_df = \
                volatile_cache.get_data_request_cache(market_request, data_store, 'market_df',
                                                     data_offset_ms)

            # If data is already cached, just return the existing CacheHandle (which is like a pointer to the reference
            # in Redis)
            if market_df is not None and start_date == old_start_date and finish_date == old_finish_date and return_cache_handles:
                return CacheHandle(market_key, add_time_expiry=False)

            if market_df is None:

                market_request_copy = MarketRequest(market_request=market_request)
                market_request_copy.start_date = start_date
                market_request_copy.finish_date = finish_date

                market_df = super(TCATickerLoaderImpl, self).get_market_data(market_request_copy)

                volatile_cache.put_data_request_cache(market_request_copy, market_key, market_df)

            market_df = self._strip_start_finish_dataframe(market_df, old_start_date, old_finish_date, market_request)
        else:
            market_df = super(TCATickerLoaderImpl, self).get_market_data(market_request)

        # Return as a cache handle (which can be easily passed across Celery for example)
        # Only if use_multithreading
        if return_cache_handles and market_request.use_multithreading:
            return volatile_cache.put_dataframe_handle(market_df,
                use_cache_handles=market_request.multithreading_params['cache_period_market_data'])

        return market_df
示例#11
0
def test_results_form_average(fill_market_trade_databases):
    """Tests averages are calculated correctly by ResultsForm, compared to a direct calculation
    """
    Mediator.get_volatile_cache().clear_cache()

    market_df, trade_df, order_df = get_sample_data()

    trade_df, _ = MetricSlippage().calculate_metric(trade_order_df=trade_df,
                                                    market_df=market_df,
                                                    bid_benchmark='mid',
                                                    ask_benchmark='mid')

    results_form = BarResultsForm(
        market_trade_order_list=['trade_df'],
        metric_name='slippage',
        aggregation_metric='mean',
        aggregate_by_field=['ticker', 'venue'],
        scalar=10000.0,
        weighting_field='executed_notional_in_reporting_currency')

    results_df = results_form.aggregate_results(
        market_trade_order_df=trade_df,
        market_df=market_df,
        market_trade_order_name='trade_df')

    slippage_average = float(results_df[0][0].values[0])

    # Directly calculate slippage
    def grab_slippage(trade_df):
        return 10000.0 * ((trade_df['slippage'] * trade_df['executed_notional_in_reporting_currency']).sum() \
                   / trade_df['executed_notional_in_reporting_currency'].sum())

    slippage_average_comp = grab_slippage(trade_df)

    # Check the average slippage
    assert slippage_average - slippage_average_comp < eps

    slippage_average_venue = results_df[1][0]['venue'][venue_filter]

    slippage_average_venue_comp = grab_slippage(
        trade_df[trade_df['venue'] == venue_filter])

    # Check the average slippage by venue
    assert slippage_average_venue - slippage_average_venue_comp < eps
示例#12
0
def test_fetch_market_data_db():
    """Tests that we can fetch data from Arctic/KDB/InfluxDB. Note you need to populate the database first before running this for
    the desired dates.
    """
    market_loader = Mediator.get_tca_market_trade_loader()

    market_data_store_list, market_data_database_table_list = _get_db_market_data_store(
    )

    for market_data_store, market_data_database_table in zip(
            market_data_store_list, market_data_database_table_list):
        market_request = MarketRequest(
            start_date=start_date,
            finish_date=finish_date,
            ticker=ticker,
            data_store=market_data_store,
            market_data_database_table=market_data_database_table)

        market_df = market_loader.get_market_data(market_request)

        try:
            market_df = Mediator.get_volatile_cache().get_dataframe_handle(
                market_df)
        except:
            pass

        assert not(market_df.empty) \
               and market_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \
               and market_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')

        market_request.start_date = invalid_start_date
        market_request.finish_date = invalid_finish_date

        market_empty_df = market_loader.get_market_data(market_request)

        try:
            market_empty_df = Mediator.get_volatile_cache(
            ).get_dataframe_handle(market_empty_df)
        except:
            pass

        assert market_empty_df.empty
示例#13
0
    def calculate_metrics_single_ticker(self, market_trade_order_combo,
                                        tca_request, dummy_market):
        volatile_cache = Mediator.get_volatile_cache(version=self._version)

        market_df, trade_order_df_values, ticker, trade_order_df_keys \
            = super(TCATickerLoaderImpl, self).calculate_metrics_single_ticker(market_trade_order_combo, tca_request, dummy_market)

        # Return as a cache handle (which can be easily passed across Celery for example) or not for the market
        # and trade/order data
        return volatile_cache.put_dataframe_handle(market_df, tca_request.multithreading_params['return_cache_handles_market_data']), \
                volatile_cache.put_dataframe_handle(trade_order_df_values, tca_request.multithreading_params['return_cache_handles_trade_data']), \
                ticker, trade_order_df_keys
示例#14
0
    def calculate_metrics_single_ticker(self, market_trade_order_combo, tca_request, dummy_market):

        volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine)

        market_df, trade_order_df_values, ticker, trade_order_df_keys \
            = super(TCATickerLoaderImpl, self).calculate_metrics_single_ticker(market_trade_order_combo, tca_request, dummy_market)

        if tca_request.use_multithreading:
            # Return as a cache handle (which can be easily passed across Celery for example) or not for the market
            # and trade/order data
            return volatile_cache.put_dataframe_handle(market_df, tca_request.multithreading_params['return_cache_handles_market_data']), \
                    volatile_cache.put_dataframe_handle(trade_order_df_values, tca_request.multithreading_params['return_cache_handles_trade_data']), \
                    ticker, trade_order_df_keys
        else:
            # For single threading, don't use cache handles (no point, because sharing in the same memory space)
            return market_df, trade_order_df_values, ticker, trade_order_df_keys
示例#15
0
from tcapy.analysis.tradeorderfilter import *

from tcapy.vis.tcaresults import TCAResults
from tcapy.vis.report.tcareport import TCAReport

from test.config import *

constants = Constants()
logger = LoggerManager().getLogger(__name__)

from tcapy.util.mediator import Mediator

logger.info('Make sure you have created folder ' + constants.csv_folder +
            ' & ' + constants.temp_data_folder + ' otherwise tests will fail')

Mediator.get_volatile_cache().clear_cache()

########################################################################################################################
# YOU MAY NEED TO CHANGE TESTING PARAMETERS IF YOUR DATABASE DOESN'T COVER THESE DATES
start_date = '01 May 2017'
finish_date = '30 May 2017'

filter_date = '03 May 2017'
start_filter_date = '00:00:00 03 May 2017'
finish_filter_date = '23:59:59 03 May 2017'

trade_data_store = 'mysql'
trade_data_database_name = 'trade_database_test_harness'

market_data_store = 'arctic-testharness'
market_data_database_table = 'market_data_table_test_harness'
示例#16
0
# See the License for the specific language governing permissions and limitations under the License.
#

import os
from collections import OrderedDict

import time

from tcapy.util.mediator import Mediator
from tcapy.conf.constants import Constants

constants = Constants()

folder = constants.test_data_harness_folder

volatile_cache = Mediator.get_volatile_cache()


def tca_example_csv_trade_data_dukascopy():
    """Loads up trade/order data from CSV files and market data externally from Dukascopy. Does not use any databases, if
    you rarely use TCA, this is fine. However, for heavy use of TCA, we strongly recommend maintaining an internal tick
    database, as external downloading of data can be very slow.

    In this case we are simply calculating the slippage of every trade and orders above them.
    """

    from tcapy.analysis.tcaengine import TCAEngineImpl
    from tcapy.analysis.tcarequest import TCARequest

    from tcapy.analysis.algos.benchmark import BenchmarkArrival, BenchmarkSpreadToMid
    from tcapy.analysis.algos.metric import MetricSlippage
示例#17
0
def test_multithreading_full_basic_tca():
    """Tests if the trade/order and market data is identical for multithreading versus singlethreading for detailed,
    aggregated and compliance. Note that we need a running Celery server for multithreading to work (as well as the
    usual SQL and Arctic databases running, if the test_csv option has not been selected). Uses a very large data sample
    """
    Mediator.get_volatile_cache().clear_cache(
    )  # clear cache to ensure all test code runs!

    tca_request = TCARequest(start_date=multithreading_start_date,
                             finish_date=multithreading_finish_date,
                             ticker=valid_ticker_list,
                             trade_data_store=trade_data_store,
                             market_data_store=market_data_store,
                             trade_order_mapping=trade_order_mapping)

    tca_engine = TCAEngineImpl(version=tcapy_version)

    tca_type = ['aggregated', 'detailed', 'compliance']

    multithreading = [True, False]

    #### Checked the executed prices match with single and multithreaded cases
    for t in tca_type:
        dict_list = []

        for m in multithreading:
            Mediator.get_volatile_cache(version=tcapy_version).clear_cache(
            )  # Clear cache to ensure all test code runs!

            tca_request.use_multithreading = m
            tca_request.tca_type = t
            dict_list.append(tca_engine.calculate_tca(tca_request=tca_request))

        print("tca_request " + t)

        for k in dict_list[0].keys():
            multi_df = dict_list[0][k]
            single_df = dict_list[1][k]

            if isinstance(single_df, pd.DataFrame) and isinstance(
                    multi_df, pd.DataFrame):
                if 'executed_price' in single_df.columns and 'executed_price' in multi_df.columns:

                    print("tablecomputation " + k)

                    exec_multi = multi_df['executed_price'].dropna()
                    exec_single = single_df['executed_price'].dropna()

                    if dump_csv_output:
                        df = pd.DataFrame(exec_multi)

                        df1 = pd.DataFrame(exec_single)
                        df1.columns = [x + '_single' for x in df1.columns]

                        df = df.join(pd.DataFrame(df1), how='outer')

                        df.to_csv(k + "_test.csv")

                        df_large = single_df.join(multi_df,
                                                  lsuffix='_single',
                                                  rsuffix='_multi',
                                                  how='outer')
                        df_large.to_csv(k + "_test_full.csv")

                    assert all(exec_multi - exec_single < eps)

                    # Only check trade/orders and not any of the other DataFrames returned
                    if 'id' in multi_df.columns and 'id' in single_df.columns:
                        # check we have unique IDs (we should have unique IDs for every event_type trade (but the id's will
                        # be the same for placements)
                        id_multi = multi_df['id']
                        id_single = single_df['id']

                        assert len(id_multi.index) == len(id_multi.index)

                        assert len(id_multi.unique()) == len(id_single.index)
                        assert len(id_multi.index) == len(id_single.unique())
示例#18
0
def test_fetch_market_trade_data_csv():
    """Tests downloading of market and trade/order data from CSV files
    """

    ### Get market data
    market_loader = Mediator.get_tca_market_trade_loader()

    market_request = MarketRequest(start_date=start_date,
                                   finish_date=finish_date,
                                   ticker=ticker,
                                   data_store=csv_market_data_store)

    market_df = market_loader.get_market_data(market_request)

    assert not(market_df.empty) \
           and market_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \
           and market_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')

    # For a high level trade data request, we need to use TCA request, because it usually involves some
    # market data download (we are assuming that the market data is being downloaded from our arctic database)
    # eg. for converting notionals to reporting currency
    tca_request = TCARequest(start_date=start_date,
                             finish_date=finish_date,
                             ticker=ticker,
                             trade_data_store='csv',
                             market_data_store=arctic_market_data_store,
                             trade_order_mapping=csv_trade_order_mapping)

    for t in trade_order_list:
        trade_order_df = market_loader.get_trade_order_data(tca_request, t)

        try:
            trade_order_df = Mediator.get_volatile_cache(
            ).get_dataframe_handle(trade_order_df)
        except:
            pass

        assert not trade_order_df.empty \
               and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \
               and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')

    ### Test using DataFactory and DatabaseSource
    from tcapy.data.datafactory import DataFactory

    data_factory = DataFactory()

    for t in trade_order_list:
        ### Test using DataFactory
        trade_request = TradeRequest(
            start_date=start_date,
            finish_date=finish_date,
            ticker=ticker,
            data_store='csv',
            trade_order_mapping=csv_trade_order_mapping,
            trade_order_type=t)

        trade_order_df = data_factory.fetch_table(trade_request)

        assert not trade_order_df.empty \
                          and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \
                          and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')

        ### Test using DatabaseSourceCSV
        from tcapy.data.databasesource import DatabaseSourceCSV

        database_source = DatabaseSourceCSV()

        trade_order_df = database_source.fetch_trade_order_data(
            start_date,
            finish_date,
            ticker,
            table_name=csv_trade_order_mapping[t])

        assert not trade_order_df.empty \
                             and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \
                             and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')
示例#19
0
def test_fetch_trade_data_ms_sql_server():
    """Tests that we can fetch data from the Microsoft SQL Server database. Note you need to populate the database
    first before running this for the desired dates.
    """
    if not (run_ms_sql_server_tests): return

    from tcapy.data.datafactory import DataFactory
    from tcapy.data.databasesource import DatabaseSourceMSSQLServer

    ### Test using TCAMarketTradeLoader
    market_loader = Mediator.get_tca_market_trade_loader()

    for t in trade_order_list:
        trade_order_mapping = {t: ms_sql_server_trade_order_mapping[t]}

        trade_request = TCARequest(
            start_date=start_date,
            finish_date=finish_date,
            ticker=ticker,
            trade_data_store=ms_sql_server_trade_data_store,
            trade_order_mapping=trade_order_mapping,
            market_data_store=arctic_market_data_store,
            use_multithreading=use_multithreading)

        trade_order_df = market_loader.get_trade_order_data(trade_request, t)

        try:
            trade_order_df = Mediator.get_volatile_cache(
            ).get_dataframe_handle(trade_order_df)
        except:
            pass

        assert not trade_order_df.empty \
               and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \
               and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')

        ### Test using DataFactory
        data_factory = DataFactory()

        trade_request = TradeRequest(start_date=start_date,
                                     finish_date=finish_date,
                                     ticker=ticker,
                                     data_store=ms_sql_server_trade_data_store,
                                     trade_order_mapping=trade_order_mapping,
                                     trade_order_type=t)

        trade_order_df = data_factory.fetch_table(trade_request)

        assert not trade_order_df.empty \
               and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \
               and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')

        ### Test using DatabaseSourceSQL
        database_source = DatabaseSourceMSSQLServer()

        trade_order_df = database_source.fetch_trade_order_data(
            start_date,
            finish_date,
            ticker,
            database_name=ms_sql_server_trade_data_database_name,
            table_name=trade_order_mapping[t])

        assert not trade_order_df.empty \
               and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \
               and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')
示例#20
0
    def get_dataframe_by_key(self,
                             key,
                             combined=True,
                             start_date=None,
                             finish_date=None):
        """Gets a specific trade/order and combine it into a single DataFrame.

        Parameters
        ----------
        key : str
            Which market data ticker or trades/order to return

        combined : True
            Should we combine all the market data for a specific ticker or trades (or orders) into a single DataFrame before returning?

        Returns
        -------
        DataFrame
        """
        if key in self._df_dict.keys():
            dataframe_key_list = self._df_dict[key]

            if 'df' in key:

                try:
                    df = Mediator.get_volatile_cache(
                        version=tcapy_version).get_dataframe_handle(
                            Mediator.get_util_func().flatten_list_of_lists(
                                dataframe_key_list),
                            burn_after_reading=True)
                except Exception as e:
                    # print("DATAFRAMEHOLDER ERROR" + str(e))
                    df = dataframe_key_list

                if combined:
                    df = Mediator.get_time_series_ops().concat_dataframe_list(
                        df)

                if df is not None:
                    if not (df.empty):
                        df = df.sort_index()

                if start_date is not None and finish_date is not None:
                    df = Mediator.get_time_series_ops(
                    ).filter_start_finish_dataframe(df, start_date,
                                                    finish_date)

                return df
            # elif 'fig' in key:
            #     try:
            #         df = self._volatile_cache.get_dataframe_handle(
            #             self._util_func.flatten_list_of_lists(dataframe_key_list), burn_after_reading=True)
            #     except:
            #         df = dataframe_key_list
            #
            #     if combined:
            #
            #         xy_dict = {}
            #
            #         for fig in df:
            #             for trace in fig['data']:
            #                 name = trace['name']
            #
            #                 xy_dict[name + '_x'] = []
            #                 xy_dict[name + '_y'] = []
            #                 xy_dict['trace_name_list'] = []
            #
            #         for fig in df:
            #             for trace in fig['data']:
            #                 name = trace['name']
            #
            #                 xy_dict[name + '_x'].append(trace['x'])
            #                 xy_dict[name + '_y'].append(trace['y'])
            #
            #                 if name not in xy_dict['trace_name_list']:
            #                     xy_dict['trace_name_list'].append(name)
            #
            #         fig = df[0]
            #
            #         # aggregate all the x & y values
            #         for i in range(0, len(fig['data'])):
            #             name = fig['data'][i]['name']
            #
            #             for j in range(1, len(xy_dict[name + '_x'])):
            #                 fig['data'][i]['x'].extend(xy_dict[name + '_x'])
            #                 fig['data'][i]['y'].extend(xy_dict[name + '_y'])
            #
            #         return fig
            else:
                # otherwise different type of metadata (don't attempt to combine it)
                try:
                    df = Mediator.get_volatile_cache(
                        version=tcapy_version).get_dataframe_handle(
                            Mediator.get_util_func().flatten_list_of_lists(
                                dataframe_key_list),
                            burn_after_reading=True)
                except Exception as e:
                    print(e)
                    df = dataframe_key_list

                if isinstance(df, list):
                    return df[0]

                return df

        return None
示例#21
0
    def get_trade_order_data(self, tca_request, trade_order_type, start_date=None, finish_date=None, return_cache_handles=True):
        # return_cache_handles returns a pointer

        logger = LoggerManager().getLogger(__name__)
        volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine)

        # by default, assume we want trade data (rather than order data)
        if trade_order_type is None:
            trade_order_type = 'trade_df'

        trade_order_contents = tca_request.trade_order_mapping[trade_order_type]

        cache = True

        # Don't attempt to catch DataFrames (or CSVs of trades)
        if isinstance(trade_order_contents, pd.DataFrame):
            cache = False
        elif isinstance(trade_order_contents, str):
            if 'csv' in trade_order_contents:
                cache = False

        # If we have allowed the caching of monthly/weekly trade data
        if tca_request.multithreading_params['cache_period_trade_data'] and cache:
            old_start_date = tca_request.start_date; old_finish_date = tca_request.finish_date

            # See if we can fetch from the cache (usually Redis)
            start_date, finish_date, trade_key, trade_df = \
                volatile_cache.get_data_request_cache(
                    tca_request, tca_request.trade_data_store, trade_order_type, tca_request.trade_data_offset_ms)

            # If data is already cached, just return the existing CacheHandle
            if trade_df is not None and start_date == old_start_date and finish_date == old_finish_date:
                return CacheHandle(trade_key, add_time_expiry=False)

            # If it wasn't in the cache then fetch it and push into the cache
            if trade_df is None:
                logger.debug('Key not found for ' + trade_key + ".. now need to load")

                # Call the superclass (get back DataFrames not return_cache_handles)
                trade_df = super(TCATickerLoaderImpl, self).get_trade_order_data(tca_request, trade_order_type,
                                                                                 start_date=start_date,
                                                                                 finish_date=finish_date)

                # Cache this periodic monthly/weekly data
                volatile_cache.put_data_request_cache(tca_request, trade_key, trade_df)

            # Strip off the start/finish dates (because when we load from cache, we get full months)
            trade_df = self._strip_start_finish_dataframe(trade_df, start_date, finish_date, tca_request)
        else:
            if start_date is None or finish_date is None:
                start_date = tca_request.start_date
                finish_date = tca_request.finish_date

            # Call the superclass (get back DataFrames not return_cache_handles)
            trade_df = super(TCATickerLoaderImpl, self).get_trade_order_data(tca_request, trade_order_type,
                                                                             start_date=start_date,
                                                                             finish_date=finish_date)

        if return_cache_handles and tca_request.use_multithreading:
            # Return as a cache handle (which can be easily passed across Celery for example)
            return volatile_cache.put_dataframe_handle(trade_df,
                                                       use_cache_handles=tca_request.multithreading_params['cache_period_trade_data'])

        return trade_df