def test_data_offset(): """Tests the offsetting of market and trade data by milliseconds by user. This might be useful if clocks are slightly offset when recording market or trade data """ Mediator.get_volatile_cache().clear_cache() tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=trade_data_store, market_data_store=market_data_store, trade_order_mapping=trade_order_mapping) tca_engine = TCAEngineImpl(version=tcapy_version) dict_of_df = tca_engine.calculate_tca(tca_request=tca_request) # Now offset both the trade and market data tca_request.trade_data_offset_ms = 1 tca_request.market_data_offset_ms = -1 dict_of_df_offset = tca_engine.calculate_tca(tca_request=tca_request) trade_df = dict_of_df[trade_df_name]; market_df = dict_of_df['market_df'] trade_df_offset = dict_of_df_offset[trade_df_name]; market_df_offset = dict_of_df_offset['market_df'] assert all(market_df.index + timedelta(milliseconds=-1) == market_df_offset.index) assert all(trade_df.index + timedelta(milliseconds=1) == trade_df_offset.index) for c in constants.date_columns: if c in trade_df.columns: assert all(trade_df[c]+ timedelta(milliseconds=1) == trade_df_offset[c])
def fill_market_trade_databases(): """Fills market and trade data with test data """ Mediator.get_volatile_cache().clear_cache() replace_append = 'replace' # Fill market data (assume: CHUNK_STORE as our default format!) for ticker in ticker_arctic: database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=arctic_lib_type) # Write CSV to Arctic database_source.convert_csv_to_table(csv_market_data_store, ticker, market_data_table, if_exists_table=replace_append, if_exists_ticker='replace', market_trade_data='market', remove_duplicates=False) replace_append = 'append' # Fill trade/order data database_source = DatabaseSourceMySQL() for t in trade_order_list: # Dump trade_df to SQL test harness database and overwrite database_source.convert_csv_to_table( csv_trade_order_mapping[t], None, (trade_order_mapping[trade_data_store])[t], database_name=trade_data_database_name, if_exists_table='replace', market_trade_data='trade')
def test_market_data_convention(fill_market_trade_databases): """Tests that market data for unusual quotations is consistent (ie. if the user requests USDEUR, this should be inverted EURUSD (which is the correct convention) """ Mediator.get_volatile_cache().clear_cache() market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version) market_request = MarketRequest(start_date=start_date, finish_date=finish_date, ticker=ticker, data_store=market_data_store, market_data_database_table=market_data_database_table) #### Compare EURUSD to USDEUR market_correct_conv_series = pd.DataFrame(market_loader.get_market_data(market_request)['mid']) market_request.ticker = 'USDEUR' market_reverse_conv_series = pd.DataFrame(1.0 / market_loader.get_market_data(market_request)['mid']) assert_frame_equal(market_correct_conv_series, market_reverse_conv_series, check_dtype=False) ### Compare EURJPY (which is autogenerated, if EURJPY is not collected directly) vs. EURUSD & USDJPY multiplied # Use resampled series for comparison market_request.ticker = 'USDJPY' market_df_USDJPY = pd.DataFrame(market_loader.get_market_data(market_request)['mid']) market_request.ticker = 'EURJPY' market_df_EURJPY = pd.DataFrame(market_loader.get_market_data(market_request)['mid']).resample('1min').mean() market_df_EURJPY_comp = (market_correct_conv_series.resample('1min').mean() * market_df_USDJPY.resample('1min').mean()) market_df_EURJPY, market_df_EURJPY_comp = market_df_EURJPY.align(market_df_EURJPY_comp, join='inner') comp = (market_df_EURJPY - market_df_EURJPY_comp).dropna() assert all(comp < eps)
def test_executed_price_notional_calculation(fill_market_trade_databases): """Test that the executed average price calculation from trades is correctly reflected in the order level """ Mediator.get_volatile_cache().clear_cache() market_df, trade_df, order_df = get_sample_data() # get the first and last points given boundary cases (and a few other random orders) to check index_boundary = np.random.randint(0, len(order_df.index) - 1, 100) index_boundary = index_boundary.tolist() index_boundary.append(0) index_boundary.append(-1) for i in index_boundary: # order_df.ix[i, 'notional'] if order_df.iloc[i]['notional'] > 1: # executed_price = order_df.ix[i, 'executed_price'] # id = order_df.ix[i, 'id'] executed_price = order_df.iloc[i]['executed_price'] id = order_df.iloc[i]['id'] executed_price_trade = trade_df[trade_df['ancestor_pointer_id'] == id]['executed_price'].fillna(0) executed_notional_trade = trade_df[ trade_df['ancestor_pointer_id'] == id]['executed_notional'].fillna(0) executed_avg_trade = ( (executed_price_trade * executed_notional_trade).sum() / executed_notional_trade.sum()) assert abs(executed_price - executed_avg_trade) < eps
def test_create_tca_report(fill_market_trade_databases): """Tests the creation of a TCAResults, checking they are fichecking it generates the right document """ Mediator.get_volatile_cache().clear_cache() tca_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=trade_data_store, trade_data_database_name=trade_data_database_name, market_data_store=market_data_store, market_data_database_table=market_data_database_table, trade_order_mapping=trade_order_mapping, metric_calcs=MetricSlippage(), results_form=TimelineResultsForm(metric_name='slippage', by_date='datehour'), use_multithreading=use_multithreading) tca_engine = TCAEngineImpl(version=tcapy_version) tca_results = TCAResults(tca_engine.calculate_tca(tca_request=tca_request), tca_request) tca_results.render_computation_charts() assert tca_results.timeline is not None and tca_results.timeline_charts is not None tca_report = TCAReport(tca_results) html = tca_report.create_report() # Quick check to see that the html has been generated by checking existance of HTML head _tag assert '<head>' in html
def test_stress_tca(fill_market_trade_databases): """Makes several large TCARequests at the same time to stress test tcapy application and also to check it works with parallel requests (note: you may need to reduce the length of the dataset if your machine has limited amounts of RAM). It can be possible that when deployed on the web, several users might make simultaneous requests. Note, do not use pylibmc, and instead use python-memcached, when using memcached as a result backend. pylibmc is not thread-safe so will come undone if you end up making parallel requests. """ from tcapy.util.swim import Swim if not (stress_test): return # Clear cache to ensure all test code runs! Mediator.get_volatile_cache().clear_cache() tca_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=valid_ticker_list, dummy_market=True, trade_data_store=trade_data_store, trade_data_database_name=trade_data_database_name, market_data_store=market_data_store, market_data_database_table=market_data_database_table, trade_order_mapping=trade_order_mapping, use_multithreading=True, tca_type='aggregated') # Kick off several simulanteous large TCA requests request_no = 2 tca_request_list = [] for i in range(0, request_no): tca_request_list.append(TCARequest(tca_request=tca_request)) tca_engine = TCAEngineImpl(version=tcapy_version) swim = Swim(parallel_library='thread') pool = swim.create_pool(thread_no=len(tca_request_list)) result = [] for item in tca_request_list: result.append(pool.apply_async(tca_engine.calculate_tca, args=(item, ))) output = [p.get() for p in result] swim.close_pool(pool, True) assert len(output) == len(tca_request_list) # Check that several DataFrames exist in the results for trade_order_results_df_dict in output: assert 'trade_df' in trade_order_results_df_dict.keys()
def test_invalid_dates_missing_data_tca(fill_market_trade_databases): """Tests if the trade/order and market data is identical for use_multithreading versus singlethreading for detailed, aggregated and compliance. Note that we need a running Celery server for use_multithreading to work (as well as the usual SQL and Arctic databases running, if the test_csv option has not been selected). Uses a very large data sample """ Mediator.get_volatile_cache().clear_cache() # Clear cache to ensure all test code runs! tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=valid_ticker_list, trade_data_store=trade_data_store, trade_data_database_name=trade_data_database_name, market_data_store=market_data_store, market_data_database_table=market_data_database_table, trade_order_mapping=trade_order_mapping) tca_engine = TCAEngineImpl(version=tcapy_version) ## Test invalid dates tca_request.start_date = invalid_start_date; tca_request.finish_date = invalid_finish_date for t in tca_type: for m in use_multithreading: tca_request.use_multithreading = m tca_request.tca_type = t exception_triggered = True try: dict_of_df_invalid = tca_engine.calculate_tca(tca_request=tca_request) exception_triggered = False except DataMissingException: assert exception_triggered ## Test a single valid ticker, but missing data (only one ticker) tca_request.start_date = start_date; tca_request.finish_date = finish_date; tca_request.ticker = missing_ticker for t in tca_type: for m in use_multithreading: Mediator.get_volatile_cache().clear_cache() # Clear cache to ensure all test code runs! tca_request.use_multithreading = m tca_request.tca_type = t exception_triggered = True try: dict_of_df_missing_ticker = tca_engine.calculate_tca(tca_request=tca_request) exception_triggered = False except DataMissingException: assert exception_triggered
def _convert_tuple_to_market_trade(self, market_trade_order_tuple): volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine) # Gather market and trade/order data (which might be stored in a list) if isinstance(market_trade_order_tuple, list): market_df_list = [] trade_order_holder = DataFrameHolder() for market_df_single, trade_order_holder_single in market_trade_order_tuple: market_df_list.append(market_df_single) trade_order_holder.add_dataframe_holder(trade_order_holder_single) market_df_list = volatile_cache.get_dataframe_handle(market_df_list, burn_after_reading=True) # to ensure that any spurious/None elements are removed market_df_list = [x for x in market_df_list if isinstance(x, pd.DataFrame)] # want to make sure the data is properly ordered too (not guarenteed we'll get it back in right order) market_df = self._time_series_ops.concat_dataframe_list(market_df_list) else: market_df = volatile_cache.get_dataframe_handle(market_trade_order_tuple[0], burn_after_reading=True) trade_order_holder = market_trade_order_tuple[1] return market_df, trade_order_holder
def _get_correct_convention_market_data(self, market_request, start_date=None, finish_date=None): # Check that cross is in correct convention if self._fx_conv.correct_notation(market_request.ticker) != market_request.ticker: raise Exception('Method expecting only crosses in correct market convention') cache = True if isinstance(market_request.data_store, pd.DataFrame): cache = False if market_request.multithreading_params['cache_period_market_data'] and cache: volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine) start_date, finish_date, market_key, market_df = \ volatile_cache.get_data_request_cache(market_request, market_request.data_store, 'market_df', market_request.data_offset_ms) if market_df is None: market_df = super(TCATickerLoaderImpl, self)._get_underlying_market_data(start_date, finish_date, market_request) volatile_cache.put_data_request_cache(market_request, market_key, market_df) return self._strip_start_finish_dataframe(market_df, start_date, finish_date, market_request) else: if start_date is None or finish_date is None: start_date = market_request.start_date finish_date = market_request.finish_date return super(TCATickerLoaderImpl, self)._get_underlying_market_data(start_date, finish_date, market_request)
def get_market_data(self, market_request, return_cache_handles=False): # Handles returns a pointer volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine) cache = True # Don't attempt to cache DataFrames if hasattr(market_request, 'market_data_store'): if (isinstance(market_request.market_data_store, pd.DataFrame)): cache = False elif isinstance(market_request.data_store, pd.DataFrame): cache = False # If we have allowed the caching of monthly/periodic market data if market_request.multithreading_params['cache_period_market_data'] and cache: old_start_date = market_request.start_date; old_finish_date = market_request.finish_date # so we can also take TCARequest objects if hasattr(market_request, 'market_data_store'): data_store = market_request.market_data_store data_offset_ms = market_request.market_data_offset_ms else: data_store = market_request.data_store data_offset_ms = market_request.data_offset_ms # See if we can fetch from the cache (typically Redis) start_date, finish_date, market_key, market_df = \ volatile_cache.get_data_request_cache(market_request, data_store, 'market_df', data_offset_ms) # If data is already cached, just return the existing CacheHandle (which is like a pointer to the reference # in Redis) if market_df is not None and start_date == old_start_date and finish_date == old_finish_date and return_cache_handles: return CacheHandle(market_key, add_time_expiry=False) if market_df is None: market_request_copy = MarketRequest(market_request=market_request) market_request_copy.start_date = start_date market_request_copy.finish_date = finish_date market_df = super(TCATickerLoaderImpl, self).get_market_data(market_request_copy) volatile_cache.put_data_request_cache(market_request_copy, market_key, market_df) market_df = self._strip_start_finish_dataframe(market_df, old_start_date, old_finish_date, market_request) else: market_df = super(TCATickerLoaderImpl, self).get_market_data(market_request) # Return as a cache handle (which can be easily passed across Celery for example) # Only if use_multithreading if return_cache_handles and market_request.use_multithreading: return volatile_cache.put_dataframe_handle(market_df, use_cache_handles=market_request.multithreading_params['cache_period_market_data']) return market_df
def test_results_form_average(fill_market_trade_databases): """Tests averages are calculated correctly by ResultsForm, compared to a direct calculation """ Mediator.get_volatile_cache().clear_cache() market_df, trade_df, order_df = get_sample_data() trade_df, _ = MetricSlippage().calculate_metric(trade_order_df=trade_df, market_df=market_df, bid_benchmark='mid', ask_benchmark='mid') results_form = BarResultsForm( market_trade_order_list=['trade_df'], metric_name='slippage', aggregation_metric='mean', aggregate_by_field=['ticker', 'venue'], scalar=10000.0, weighting_field='executed_notional_in_reporting_currency') results_df = results_form.aggregate_results( market_trade_order_df=trade_df, market_df=market_df, market_trade_order_name='trade_df') slippage_average = float(results_df[0][0].values[0]) # Directly calculate slippage def grab_slippage(trade_df): return 10000.0 * ((trade_df['slippage'] * trade_df['executed_notional_in_reporting_currency']).sum() \ / trade_df['executed_notional_in_reporting_currency'].sum()) slippage_average_comp = grab_slippage(trade_df) # Check the average slippage assert slippage_average - slippage_average_comp < eps slippage_average_venue = results_df[1][0]['venue'][venue_filter] slippage_average_venue_comp = grab_slippage( trade_df[trade_df['venue'] == venue_filter]) # Check the average slippage by venue assert slippage_average_venue - slippage_average_venue_comp < eps
def test_fetch_market_data_db(): """Tests that we can fetch data from Arctic/KDB/InfluxDB. Note you need to populate the database first before running this for the desired dates. """ market_loader = Mediator.get_tca_market_trade_loader() market_data_store_list, market_data_database_table_list = _get_db_market_data_store( ) for market_data_store, market_data_database_table in zip( market_data_store_list, market_data_database_table_list): market_request = MarketRequest( start_date=start_date, finish_date=finish_date, ticker=ticker, data_store=market_data_store, market_data_database_table=market_data_database_table) market_df = market_loader.get_market_data(market_request) try: market_df = Mediator.get_volatile_cache().get_dataframe_handle( market_df) except: pass assert not(market_df.empty) \ and market_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and market_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') market_request.start_date = invalid_start_date market_request.finish_date = invalid_finish_date market_empty_df = market_loader.get_market_data(market_request) try: market_empty_df = Mediator.get_volatile_cache( ).get_dataframe_handle(market_empty_df) except: pass assert market_empty_df.empty
def calculate_metrics_single_ticker(self, market_trade_order_combo, tca_request, dummy_market): volatile_cache = Mediator.get_volatile_cache(version=self._version) market_df, trade_order_df_values, ticker, trade_order_df_keys \ = super(TCATickerLoaderImpl, self).calculate_metrics_single_ticker(market_trade_order_combo, tca_request, dummy_market) # Return as a cache handle (which can be easily passed across Celery for example) or not for the market # and trade/order data return volatile_cache.put_dataframe_handle(market_df, tca_request.multithreading_params['return_cache_handles_market_data']), \ volatile_cache.put_dataframe_handle(trade_order_df_values, tca_request.multithreading_params['return_cache_handles_trade_data']), \ ticker, trade_order_df_keys
def calculate_metrics_single_ticker(self, market_trade_order_combo, tca_request, dummy_market): volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine) market_df, trade_order_df_values, ticker, trade_order_df_keys \ = super(TCATickerLoaderImpl, self).calculate_metrics_single_ticker(market_trade_order_combo, tca_request, dummy_market) if tca_request.use_multithreading: # Return as a cache handle (which can be easily passed across Celery for example) or not for the market # and trade/order data return volatile_cache.put_dataframe_handle(market_df, tca_request.multithreading_params['return_cache_handles_market_data']), \ volatile_cache.put_dataframe_handle(trade_order_df_values, tca_request.multithreading_params['return_cache_handles_trade_data']), \ ticker, trade_order_df_keys else: # For single threading, don't use cache handles (no point, because sharing in the same memory space) return market_df, trade_order_df_values, ticker, trade_order_df_keys
from tcapy.analysis.tradeorderfilter import * from tcapy.vis.tcaresults import TCAResults from tcapy.vis.report.tcareport import TCAReport from test.config import * constants = Constants() logger = LoggerManager().getLogger(__name__) from tcapy.util.mediator import Mediator logger.info('Make sure you have created folder ' + constants.csv_folder + ' & ' + constants.temp_data_folder + ' otherwise tests will fail') Mediator.get_volatile_cache().clear_cache() ######################################################################################################################## # YOU MAY NEED TO CHANGE TESTING PARAMETERS IF YOUR DATABASE DOESN'T COVER THESE DATES start_date = '01 May 2017' finish_date = '30 May 2017' filter_date = '03 May 2017' start_filter_date = '00:00:00 03 May 2017' finish_filter_date = '23:59:59 03 May 2017' trade_data_store = 'mysql' trade_data_database_name = 'trade_database_test_harness' market_data_store = 'arctic-testharness' market_data_database_table = 'market_data_table_test_harness'
# See the License for the specific language governing permissions and limitations under the License. # import os from collections import OrderedDict import time from tcapy.util.mediator import Mediator from tcapy.conf.constants import Constants constants = Constants() folder = constants.test_data_harness_folder volatile_cache = Mediator.get_volatile_cache() def tca_example_csv_trade_data_dukascopy(): """Loads up trade/order data from CSV files and market data externally from Dukascopy. Does not use any databases, if you rarely use TCA, this is fine. However, for heavy use of TCA, we strongly recommend maintaining an internal tick database, as external downloading of data can be very slow. In this case we are simply calculating the slippage of every trade and orders above them. """ from tcapy.analysis.tcaengine import TCAEngineImpl from tcapy.analysis.tcarequest import TCARequest from tcapy.analysis.algos.benchmark import BenchmarkArrival, BenchmarkSpreadToMid from tcapy.analysis.algos.metric import MetricSlippage
def test_multithreading_full_basic_tca(): """Tests if the trade/order and market data is identical for multithreading versus singlethreading for detailed, aggregated and compliance. Note that we need a running Celery server for multithreading to work (as well as the usual SQL and Arctic databases running, if the test_csv option has not been selected). Uses a very large data sample """ Mediator.get_volatile_cache().clear_cache( ) # clear cache to ensure all test code runs! tca_request = TCARequest(start_date=multithreading_start_date, finish_date=multithreading_finish_date, ticker=valid_ticker_list, trade_data_store=trade_data_store, market_data_store=market_data_store, trade_order_mapping=trade_order_mapping) tca_engine = TCAEngineImpl(version=tcapy_version) tca_type = ['aggregated', 'detailed', 'compliance'] multithreading = [True, False] #### Checked the executed prices match with single and multithreaded cases for t in tca_type: dict_list = [] for m in multithreading: Mediator.get_volatile_cache(version=tcapy_version).clear_cache( ) # Clear cache to ensure all test code runs! tca_request.use_multithreading = m tca_request.tca_type = t dict_list.append(tca_engine.calculate_tca(tca_request=tca_request)) print("tca_request " + t) for k in dict_list[0].keys(): multi_df = dict_list[0][k] single_df = dict_list[1][k] if isinstance(single_df, pd.DataFrame) and isinstance( multi_df, pd.DataFrame): if 'executed_price' in single_df.columns and 'executed_price' in multi_df.columns: print("tablecomputation " + k) exec_multi = multi_df['executed_price'].dropna() exec_single = single_df['executed_price'].dropna() if dump_csv_output: df = pd.DataFrame(exec_multi) df1 = pd.DataFrame(exec_single) df1.columns = [x + '_single' for x in df1.columns] df = df.join(pd.DataFrame(df1), how='outer') df.to_csv(k + "_test.csv") df_large = single_df.join(multi_df, lsuffix='_single', rsuffix='_multi', how='outer') df_large.to_csv(k + "_test_full.csv") assert all(exec_multi - exec_single < eps) # Only check trade/orders and not any of the other DataFrames returned if 'id' in multi_df.columns and 'id' in single_df.columns: # check we have unique IDs (we should have unique IDs for every event_type trade (but the id's will # be the same for placements) id_multi = multi_df['id'] id_single = single_df['id'] assert len(id_multi.index) == len(id_multi.index) assert len(id_multi.unique()) == len(id_single.index) assert len(id_multi.index) == len(id_single.unique())
def test_fetch_market_trade_data_csv(): """Tests downloading of market and trade/order data from CSV files """ ### Get market data market_loader = Mediator.get_tca_market_trade_loader() market_request = MarketRequest(start_date=start_date, finish_date=finish_date, ticker=ticker, data_store=csv_market_data_store) market_df = market_loader.get_market_data(market_request) assert not(market_df.empty) \ and market_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and market_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') # For a high level trade data request, we need to use TCA request, because it usually involves some # market data download (we are assuming that the market data is being downloaded from our arctic database) # eg. for converting notionals to reporting currency tca_request = TCARequest(start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store='csv', market_data_store=arctic_market_data_store, trade_order_mapping=csv_trade_order_mapping) for t in trade_order_list: trade_order_df = market_loader.get_trade_order_data(tca_request, t) try: trade_order_df = Mediator.get_volatile_cache( ).get_dataframe_handle(trade_order_df) except: pass assert not trade_order_df.empty \ and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') ### Test using DataFactory and DatabaseSource from tcapy.data.datafactory import DataFactory data_factory = DataFactory() for t in trade_order_list: ### Test using DataFactory trade_request = TradeRequest( start_date=start_date, finish_date=finish_date, ticker=ticker, data_store='csv', trade_order_mapping=csv_trade_order_mapping, trade_order_type=t) trade_order_df = data_factory.fetch_table(trade_request) assert not trade_order_df.empty \ and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') ### Test using DatabaseSourceCSV from tcapy.data.databasesource import DatabaseSourceCSV database_source = DatabaseSourceCSV() trade_order_df = database_source.fetch_trade_order_data( start_date, finish_date, ticker, table_name=csv_trade_order_mapping[t]) assert not trade_order_df.empty \ and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')
def test_fetch_trade_data_ms_sql_server(): """Tests that we can fetch data from the Microsoft SQL Server database. Note you need to populate the database first before running this for the desired dates. """ if not (run_ms_sql_server_tests): return from tcapy.data.datafactory import DataFactory from tcapy.data.databasesource import DatabaseSourceMSSQLServer ### Test using TCAMarketTradeLoader market_loader = Mediator.get_tca_market_trade_loader() for t in trade_order_list: trade_order_mapping = {t: ms_sql_server_trade_order_mapping[t]} trade_request = TCARequest( start_date=start_date, finish_date=finish_date, ticker=ticker, trade_data_store=ms_sql_server_trade_data_store, trade_order_mapping=trade_order_mapping, market_data_store=arctic_market_data_store, use_multithreading=use_multithreading) trade_order_df = market_loader.get_trade_order_data(trade_request, t) try: trade_order_df = Mediator.get_volatile_cache( ).get_dataframe_handle(trade_order_df) except: pass assert not trade_order_df.empty \ and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') ### Test using DataFactory data_factory = DataFactory() trade_request = TradeRequest(start_date=start_date, finish_date=finish_date, ticker=ticker, data_store=ms_sql_server_trade_data_store, trade_order_mapping=trade_order_mapping, trade_order_type=t) trade_order_df = data_factory.fetch_table(trade_request) assert not trade_order_df.empty \ and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc') ### Test using DatabaseSourceSQL database_source = DatabaseSourceMSSQLServer() trade_order_df = database_source.fetch_trade_order_data( start_date, finish_date, ticker, database_name=ms_sql_server_trade_data_database_name, table_name=trade_order_mapping[t]) assert not trade_order_df.empty \ and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \ and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')
def get_dataframe_by_key(self, key, combined=True, start_date=None, finish_date=None): """Gets a specific trade/order and combine it into a single DataFrame. Parameters ---------- key : str Which market data ticker or trades/order to return combined : True Should we combine all the market data for a specific ticker or trades (or orders) into a single DataFrame before returning? Returns ------- DataFrame """ if key in self._df_dict.keys(): dataframe_key_list = self._df_dict[key] if 'df' in key: try: df = Mediator.get_volatile_cache( version=tcapy_version).get_dataframe_handle( Mediator.get_util_func().flatten_list_of_lists( dataframe_key_list), burn_after_reading=True) except Exception as e: # print("DATAFRAMEHOLDER ERROR" + str(e)) df = dataframe_key_list if combined: df = Mediator.get_time_series_ops().concat_dataframe_list( df) if df is not None: if not (df.empty): df = df.sort_index() if start_date is not None and finish_date is not None: df = Mediator.get_time_series_ops( ).filter_start_finish_dataframe(df, start_date, finish_date) return df # elif 'fig' in key: # try: # df = self._volatile_cache.get_dataframe_handle( # self._util_func.flatten_list_of_lists(dataframe_key_list), burn_after_reading=True) # except: # df = dataframe_key_list # # if combined: # # xy_dict = {} # # for fig in df: # for trace in fig['data']: # name = trace['name'] # # xy_dict[name + '_x'] = [] # xy_dict[name + '_y'] = [] # xy_dict['trace_name_list'] = [] # # for fig in df: # for trace in fig['data']: # name = trace['name'] # # xy_dict[name + '_x'].append(trace['x']) # xy_dict[name + '_y'].append(trace['y']) # # if name not in xy_dict['trace_name_list']: # xy_dict['trace_name_list'].append(name) # # fig = df[0] # # # aggregate all the x & y values # for i in range(0, len(fig['data'])): # name = fig['data'][i]['name'] # # for j in range(1, len(xy_dict[name + '_x'])): # fig['data'][i]['x'].extend(xy_dict[name + '_x']) # fig['data'][i]['y'].extend(xy_dict[name + '_y']) # # return fig else: # otherwise different type of metadata (don't attempt to combine it) try: df = Mediator.get_volatile_cache( version=tcapy_version).get_dataframe_handle( Mediator.get_util_func().flatten_list_of_lists( dataframe_key_list), burn_after_reading=True) except Exception as e: print(e) df = dataframe_key_list if isinstance(df, list): return df[0] return df return None
def get_trade_order_data(self, tca_request, trade_order_type, start_date=None, finish_date=None, return_cache_handles=True): # return_cache_handles returns a pointer logger = LoggerManager().getLogger(__name__) volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine) # by default, assume we want trade data (rather than order data) if trade_order_type is None: trade_order_type = 'trade_df' trade_order_contents = tca_request.trade_order_mapping[trade_order_type] cache = True # Don't attempt to catch DataFrames (or CSVs of trades) if isinstance(trade_order_contents, pd.DataFrame): cache = False elif isinstance(trade_order_contents, str): if 'csv' in trade_order_contents: cache = False # If we have allowed the caching of monthly/weekly trade data if tca_request.multithreading_params['cache_period_trade_data'] and cache: old_start_date = tca_request.start_date; old_finish_date = tca_request.finish_date # See if we can fetch from the cache (usually Redis) start_date, finish_date, trade_key, trade_df = \ volatile_cache.get_data_request_cache( tca_request, tca_request.trade_data_store, trade_order_type, tca_request.trade_data_offset_ms) # If data is already cached, just return the existing CacheHandle if trade_df is not None and start_date == old_start_date and finish_date == old_finish_date: return CacheHandle(trade_key, add_time_expiry=False) # If it wasn't in the cache then fetch it and push into the cache if trade_df is None: logger.debug('Key not found for ' + trade_key + ".. now need to load") # Call the superclass (get back DataFrames not return_cache_handles) trade_df = super(TCATickerLoaderImpl, self).get_trade_order_data(tca_request, trade_order_type, start_date=start_date, finish_date=finish_date) # Cache this periodic monthly/weekly data volatile_cache.put_data_request_cache(tca_request, trade_key, trade_df) # Strip off the start/finish dates (because when we load from cache, we get full months) trade_df = self._strip_start_finish_dataframe(trade_df, start_date, finish_date, tca_request) else: if start_date is None or finish_date is None: start_date = tca_request.start_date finish_date = tca_request.finish_date # Call the superclass (get back DataFrames not return_cache_handles) trade_df = super(TCATickerLoaderImpl, self).get_trade_order_data(tca_request, trade_order_type, start_date=start_date, finish_date=finish_date) if return_cache_handles and tca_request.use_multithreading: # Return as a cache handle (which can be easily passed across Celery for example) return volatile_cache.put_dataframe_handle(trade_df, use_cache_handles=tca_request.multithreading_params['cache_period_trade_data']) return trade_df