Пример #1
0
    def __init__(self,
                 market_data_postfix='dukascopy',
                 csv_market_data=None,
                 write_to_db=True,
                 sql_trade_database_type='ms_sql_server'):
        if csv_market_data is None:
            self._market_data_source = 'arctic-' + market_data_postfix
        else:
            self._market_data_source = csv_market_data

        self._tca_market = Mediator.get_tca_market_trade_loader()

        # Assumes MongoDB for tick data and MSSQL for trade/order data
        if write_to_db:
            self._database_source_market = DatabaseSourceArctic(
                postfix=market_data_postfix)  # market data source

            self._market_data_database_name = constants.arctic_market_data_database_name
            self._market_data_database_table = constants.arctic_market_data_database_table

            if sql_trade_database_type == 'ms_sql_server':
                self._database_source_trade = DatabaseSourceMSSQLServer(
                )  # trade data source
                self._trade_data_database_name = constants.ms_sql_server_trade_data_database_name
            elif sql_trade_database_type == 'mysql':
                self._database_source_trade = DatabaseSourceMySQL(
                )  # trade data source
                self._trade_data_database_name = constants.mysql_trade_data_database_name

        self.time_series_ops = TimeSeriesOps()
        self.rand_time_series = RandomiseTimeSeries()
Пример #2
0
def test_write_market_data_arctic():
    """Tests we can write market data to Arctic
    """
    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    ### Test we can read data from CSV and dump to Arctic (and when read back it matches CSV)
    db_start_date = '01 Jan 2016'
    db_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    replace_append = ['replace', 'append']

    # Check first when replacing full table and then appending
    for a in arctic_lib_type:
        for i in replace_append:

            database_source = DatabaseSourceArctic(postfix='testharness',
                                                   arctic_lib_type=a)

            # Write CSV to Arctic
            database_source.convert_csv_to_table(
                csv_market_data_store,
                ticker,
                test_harness_arctic_market_data_table,
                if_exists_table=i,
                if_exists_ticker='replace',
                market_trade_data='market',
                remove_duplicates=False)

            # Fetch data directly from CSV
            database_source_csv = DatabaseSourceCSV(
                market_data_database_csv=csv_market_data_store)

            market_df_csv = database_source_csv.fetch_market_data(
                start_date=db_start_date,
                finish_date=db_finish_date,
                ticker=ticker)

            # Read back data from Arctic and compare with CSV
            market_request = MarketRequest(
                start_date=db_start_date,
                finish_date=db_finish_date,
                ticker=ticker,
                data_store=
                database_source,  # test_harness_arctic_market_data_store,
                market_data_database_table=test_harness_arctic_market_data_table
            )

            market_df_load = market_loader.get_market_data(
                market_request=market_request)

            diff_df = market_df_csv['mid'] - market_df_load['mid']

            diff_df.to_csv('test' + i + '.csv')
            assert all(diff_df < eps)
Пример #3
0
def example_arctic_dukacopy_download():
    """Example of downloading from the lower level Arctic wrapper directly (DatabaseSourceArctic, rather than using any
    higher level classes such as TCAMarketTradeDataLoader
    """

    data_loader = DatabaseSourceArctic(postfix='dukascopy')

    df = data_loader.fetch_market_data('01 May 2017',
                                       '30 May 2017',
                                       ticker='EURUSD')

    print(df)
Пример #4
0
def example_arctic_ncfx_download():
    """Example of downloading from the lower level Arctic wrapper directly (DatabaseSourceArctic, rather than using any
    higher level classes such as TCAMarketTradeDataLoader
    """

    data_loader = DatabaseSourceArctic(postfix='ncfx')

    df = data_loader.fetch_market_data(short_start_date,
                                       short_finish_date,
                                       ticker=ticker)

    print(df)
Пример #5
0
def fill_market_trade_databases():
    """Fills market and trade data with test data
    """
    Mediator.get_volatile_cache().clear_cache()

    replace_append = 'replace'

    # Fill market data (assume: CHUNK_STORE as our default format!)
    for ticker in ticker_arctic:
        database_source = DatabaseSourceArctic(postfix='testharness',
                                               arctic_lib_type=arctic_lib_type)

        # Write CSV to Arctic
        database_source.convert_csv_to_table(csv_market_data_store,
                                             ticker,
                                             market_data_table,
                                             if_exists_table=replace_append,
                                             if_exists_ticker='replace',
                                             market_trade_data='market',
                                             remove_duplicates=False)

        replace_append = 'append'

    # Fill trade/order data
    database_source = DatabaseSourceMySQL()

    for t in trade_order_list:
        # Dump trade_df to SQL test harness database and overwrite
        database_source.convert_csv_to_table(
            csv_trade_order_mapping[t],
            None, (trade_order_mapping[trade_data_store])[t],
            database_name=trade_data_database_name,
            if_exists_table='replace',
            market_trade_data='trade')
Пример #6
0
def test_delete_market_data_arctic():
    """Tests we can delete a section of a data for a particular
    """
    if not (run_arctic_tests): return

    for a in arctic_lib_type:
        database_source = DatabaseSourceArctic(postfix='testharness',
                                               arctic_lib_type=a)

        ### Test we can read data from CSV and dump to Arctic (and when read back it matches CSV)
        db_start_date = '01 Jan 2016'
        db_finish_date = pd.Timestamp(datetime.datetime.utcnow())

        # Write test market data CSV to arctic first
        database_source.convert_csv_to_table(
            csv_market_data_store,
            ticker,
            test_harness_arctic_market_data_table,
            if_exists_table='replace',
            if_exists_ticker='replace',
            market_trade_data='market',
            remove_duplicates=False)

        db_start_cut_off = '26 Apr 2017 00:00'
        db_finish_cut_off = '27 Apr 2017 00:50'

        market_df_old = database_source.fetch_market_data(
            start_date=db_start_date,
            finish_date=db_finish_date,
            ticker=ticker,
            table_name=test_harness_arctic_market_data_table)

        market_df_old = market_df_old.loc[
            (market_df_old.index <= db_start_cut_off) |
            (market_df_old.index >= db_finish_cut_off)]

        # Do it with Arctic (note: underneath this will just use pandas, as can't do on database deletion with Arctic)
        database_source.delete_market_data(
            ticker,
            start_date=db_start_cut_off,
            finish_date=db_finish_cut_off,
            table_name=test_harness_arctic_market_data_table)

        # read back data from database (will exclude the deleted records)
        market_df_new = database_source.fetch_market_data(
            start_date=db_start_date,
            finish_date=db_finish_date,
            ticker=ticker,
            table_name=test_harness_arctic_market_data_table)

        # sort columns so they are same order
        market_df_old = market_df_old.sort_index(axis=1)
        market_df_new = market_df_new.sort_index(axis=1)

        assert_frame_equal(market_df_old, market_df_new)
Пример #7
0
def test_append_market_data_arctic():
    """Tests we can append market data to arctic (we will have already written data to the test harness database)
    """
    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    ### Test we can append (non-overlapping) data to Arctic
    arctic_start_date = '01 Jan 2016'; arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    # use this market request later when reading back from Arctic
    market_request = MarketRequest(start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker,
                                   data_store=test_harness_arctic_market_data_store,
                                   market_data_database_table=test_harness_arctic_market_data_table)

    # load data from CSV for comparison later
    database_source_csv = DatabaseSourceCSV(market_data_database_csv=csv_market_data_store)

    market_df_csv = database_source_csv.fetch_market_data(
        start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker)

    market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv, chunks=2)

    for a in arctic_lib_type:

        database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a)

        market_df_lower = market_df_list[0];
        market_df_higher = market_df_list[1]

        database_source.append_market_data(market_df_lower, ticker, table_name=test_harness_arctic_market_data_table,
                                           if_exists_table='replace', if_exists_ticker='replace', remove_duplicates=False)

        overlap_error = False

        ## Try to append overlapping data (this will fail!)
        try:
            database_source.append_market_data(market_df_lower, ticker,
                                               table_name=test_harness_arctic_market_data_table,
                                               if_exists_table='append', if_exists_ticker='append', remove_duplicates=False)
        except ErrorWritingOverlapDataException as e:
            overlap_error = True

        assert overlap_error

        # Append non-overlapping data which follows (writing overlapping data into Arctic will mess up the datastore!)
        database_source.append_market_data(market_df_higher, ticker, table_name=test_harness_arctic_market_data_table,
                                           if_exists_table='append', if_exists_ticker='append', remove_duplicates=False)

        market_df_all_read_back = market_loader.get_market_data(market_request=market_request)

        assert all(market_df_all_read_back['mid'] - market_df_csv['mid'] < eps)
Пример #8
0
def write_mongo_db_atlas_arctic():
    """Tests we can write market data to Arctic/MongoDB on Atlas (cloud)
    """

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    ### Test we can read data from CSV and dump to Arctic (and when read back it matches CSV)
    db_start_date = '01 Jan 2016'
    db_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    database_source = DatabaseSourceArctic(
        postfix='testharness',
        arctic_lib_type='CHUNK_STORE',
        connection_string=arctic_connection_string)

    # Write CSV to Arctic
    database_source.convert_csv_to_table(csv_market_data_store,
                                         ticker,
                                         test_harness_arctic_market_data_table,
                                         if_exists_table='replace',
                                         if_exists_ticker='replace',
                                         market_trade_data='market',
                                         remove_duplicates=False)

    # Read back data from Arctic and compare with CSV
    market_request = MarketRequest(
        start_date=db_start_date,
        finish_date=db_finish_date,
        ticker=ticker,
        data_store=database_source,  # test_harness_arctic_market_data_store,
        market_data_database_table=test_harness_arctic_market_data_table)

    market_df_load = market_loader.get_market_data(
        market_request=market_request)

    print(market_df_load)
Пример #9
0
def test_write_chunked_market_data_arctic():
    """For very large CSV files we might need to read them in chunks. tcapy supports this and also supports CSVs
    which are sorted in reverse (ie. descending). We need to enable chunking and reverse reading with flags.

    This tests whether chunked data is written correctly to Arctic, comparing it with that read from CSV directly
    """

    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    arctic_start_date = '01 Jan 2016'; arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    # load data from CSVs directly (for comparison later)
    market_df_csv_desc = DatabaseSourceCSV(market_data_database_csv=csv_reverse_market_data_store).fetch_market_data(
        start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker)

    market_df_csv_asc = DatabaseSourceCSV(market_data_database_csv=csv_market_data_store).fetch_market_data(
        start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker)

    for a in arctic_lib_type:
        database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a)

        ### write CSV data to Arctic which is sorted ascending (default!)
        database_source.convert_csv_to_table(csv_market_data_store, ticker,
                                             test_harness_arctic_market_data_table,
                                             if_exists_table='replace',
                                             if_exists_ticker='replace', market_trade_data='market',
                                             csv_read_chunksize=100000, remove_duplicates=False)

        market_request = MarketRequest(start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker,
                                       data_store=test_harness_arctic_market_data_store,
                                       market_data_database_table=test_harness_arctic_market_data_table)

        market_df_load = market_loader.get_market_data(market_request=market_request)

        # compare reading directly from the CSV vs. reading back from arctic
        assert all(market_df_csv_asc['mid'] - market_df_load['mid'] < eps)

        ### write CSV data to Arctic which is sorted descending
        database_source.convert_csv_to_table(csv_reverse_market_data_store, ticker,
                                             test_harness_arctic_market_data_table,
                                             if_exists_table='append',
                                             if_exists_ticker='replace', market_trade_data='market',
                                             csv_read_chunksize=100000, read_in_reverse=True, remove_duplicates=False)

        market_request = MarketRequest(start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker,
                                       data_store=test_harness_arctic_market_data_store,
                                       market_data_database_table=test_harness_arctic_market_data_table)

        market_df_load = market_loader.get_market_data(market_request=market_request)

        # compare reading directly from the CSV vs. reading back from arctic
        assert all(market_df_csv_desc['mid'] - market_df_load['mid'] < eps)
Пример #10
0
def test_randomized_trade_data_generation():
    """Tests randomized trade generation data (and writing to database)
    """
    data_test_creator = DataTestCreator(write_to_db=False)

    # use database source as Arctic for market data (assume we are using market data as a source)
    if use_test_csv:
        data_test_creator._database_source_market = DatabaseSourceCSV(
            market_data_database_csv=market_data_store)
    else:
        data_test_creator._database_source_market = DatabaseSourceArctic(
            postfix=postfix)

    # create randomised trade/order data
    trade_order = data_test_creator.create_test_trade_order(
        ticker, start_date=start_date, finish_date=finish_date)

    # trade_order has dictionary of trade_df and order_df

    # make sure the number of trades > number of orders
    assert (len(trade_order['trade_df'].index) > len(
        trade_order['order_df'].index))
def test_randomized_trade_data_generation():
    """Tests randomized trade generation data (and writing to database)
    """
    from tcapy.data.datatestcreator import DataTestCreator

    data_test_creator = DataTestCreator(market_data_postfix=postfix, write_to_db=False,
                                        market_data_database_table=test_harness_arctic_market_data_table,
                                        trade_data_database_name=test_harness_mysql_trade_data_database)

    # Use database source as Arctic for market data (assume we are using market data as a source)
    if use_market_data_test_csv:
        data_test_creator._database_source_market = DatabaseSourceCSV(market_data_database_csv=market_data_store)
        data_test_creator._market_data_source = market_data_store
    else:
        data_test_creator._database_source_market = DatabaseSourceArctic(postfix=postfix)

    # Create randomised trade/order data
    trade_order = data_test_creator.create_test_trade_order(ticker, start_date=start_date, finish_date=finish_date)

    # Trade_order has dictionary of trade_df and order_df

    # Make sure the number of trades > number of orders
    assert (len(trade_order['trade_df'].index) > len(trade_order['order_df'].index))
Пример #12
0
class DataTestCreator(object):
    """This class copies market data/trade data to our database (by default: Arctic/MongoDB for market data and
    MSSQL for trade data). It generates randomised test trades/orders based upon the market data, randomly perturbing
    the bid/ask to simulate a traded price.

    """
    def __init__(self,
                 market_data_postfix='dukascopy',
                 csv_market_data=None,
                 write_to_db=True,
                 sql_trade_database_type='ms_sql_server'):
        if csv_market_data is None:
            self._market_data_source = 'arctic-' + market_data_postfix
        else:
            self._market_data_source = csv_market_data

        self._tca_market = Mediator.get_tca_market_trade_loader()

        # Assumes MongoDB for tick data and MSSQL for trade/order data
        if write_to_db:
            self._database_source_market = DatabaseSourceArctic(
                postfix=market_data_postfix)  # market data source

            self._market_data_database_name = constants.arctic_market_data_database_name
            self._market_data_database_table = constants.arctic_market_data_database_table

            if sql_trade_database_type == 'ms_sql_server':
                self._database_source_trade = DatabaseSourceMSSQLServer(
                )  # trade data source
                self._trade_data_database_name = constants.ms_sql_server_trade_data_database_name
            elif sql_trade_database_type == 'mysql':
                self._database_source_trade = DatabaseSourceMySQL(
                )  # trade data source
                self._trade_data_database_name = constants.mysql_trade_data_database_name

        self.time_series_ops = TimeSeriesOps()
        self.rand_time_series = RandomiseTimeSeries()

    def populate_test_database_with_csv(self,
                                        csv_market_data=None,
                                        ticker=None,
                                        csv_trade_data=None,
                                        if_exists_market_table='append',
                                        if_exists_market_ticker='replace',
                                        if_exists_trade_table='replace',
                                        market_data_postfix='dukascopy',
                                        remove_market_duplicates=False):
        """Populates both the market database and trade database with market data and trade/order data respectively, which
        have been sourced in CSV/HDF5 files.

        Parameters
        ----------
        csv_market_data : str (list)
            Path of CSV/HDF5 file with market data

        ticker : str (list)
            Ticker for market data

        csv_trade_data : dict
            Dictionary with name of trade/order and associated path of CSV/HDF5 file with trade/order data

        if_exists_market_table : str
            'replace' - deletes whole market data table
            'append' (default) - adds to existing market data

        if_exists_market_ticker : str
            'replace' (default) - deletes existing data for the ticker
            'append' - appends data for this this

        if_exists_trade_table : str
            'replace' - deletes data in trade table, before writing

        market_data_postfix : str (default 'dukascopy')
            data source for market data (typically broker or venue name)

        remove_market_duplicates : bool (default: False)
            Should we remove any duplicated values in market data (for TCA purposes, we can usually remove duplicated values
            However, we need to be careful when using richer market data (eg. with volume data), where consecutive prices
            might be the same but have different volume/other fields

        Returns
        -------

        """

        logger = LoggerManager.getLogger(__name__)

        # Populate the market data (eg. spot data)
        if csv_market_data is not None:
            self._database_source_market.set_postfix(market_data_postfix)
            logger.info('Writing market data to database')
            self._database_source_market.convert_csv_to_table(
                csv_market_data,
                ticker,
                self._market_data_database_table,
                database_name=self._market_data_database_name,
                if_exists_table=if_exists_market_table,
                if_exists_ticker=if_exists_market_ticker,
                remove_duplicates=remove_market_duplicates)

        # Populate the test trade/order data (which will have been randomly generated)
        if csv_trade_data is not None:
            logger.info('Writing trade data to database')

            # Allow for writing of trades + orders each to a different database table
            if isinstance(csv_trade_data, dict):
                for key in csv_trade_data.keys():
                    # csv file name, trade/order name (eg. trade_df)
                    self._database_source_trade.convert_csv_to_table(
                        csv_trade_data[key],
                        None,
                        key,
                        database_name=self._trade_data_database_name,
                        if_exists_table=if_exists_trade_table)

            # Otherwise simply assume we are writing trade data
            else:
                logger.error("Specify trade/orders hierarchy")

        logger.info('Completed writing data to database')

    def create_test_trade_order(self,
                                ticker,
                                start_date='01 Jan 2016',
                                finish_date='01 May 2018',
                                order_min_size=0.5 * constants.MILLION,
                                order_max_size=20.0 * constants.MILLION,
                                number_of_orders_min_per_year=252 * 20,
                                number_of_orders_max_per_year=252 * 200):
        """Create a randomised list of orders & trade using indicative market data as a source (and perturbing the
        execution prices, within various constraints, such as the approximate size of orders trades, the orders per _year

        Parameters
        ----------
        ticker : str
            Ticker

        start_date : str
            Start date of the orders

        finish_date : str
            Finish date of the orders

        order_min_size : float
            Minimum size of orders

        order_max_size : float
            Maximum size of orders

        number_of_orders_min_per_year : int
            Minimum orders per _year

        number_of_orders_max_per_year : int
            Maximum orders per _year

        Returns
        -------
        DataFrame
        """
        logger = LoggerManager.getLogger(__name__)

        if isinstance(ticker, str):
            ticker = [ticker]

        order_list = []
        trade_list = []

        start_date = self.time_series_ops.date_parse(start_date,
                                                     assume_utc=True)
        finish_date = self.time_series_ops.date_parse(finish_date,
                                                      assume_utc=True)
        util_func = UtilFunc()

        # Make this parallel? but may have memory issues
        for tick in ticker:

            logger.info("Loading market data for " + tick)

            # split into yearly chunks (otherwise can run out of memory easily)
            date_list = util_func.split_date_single_list(
                start_date,
                finish_date,
                split_size='yearly',
                add_partial_period_start_finish_dates=True)

            # TODO do in a batch fashion
            for i in range(0, len(date_list) - 1):
                df = self._tca_market.get_market_data(
                    MarketRequest(start_date=date_list[i],
                                  finish_date=date_list[i + 1],
                                  ticker=tick,
                                  data_store=self._market_data_source))

                # self.database_source_market.fetch_market_data(start_date = start_date, finish_date = finish_date, ticker = tick)

                # Need to make sure there's sufficient market data!
                if df is not None:
                    if len(df.index) >= 2:
                        # Get the percentage of the _year represented by the difference between the start and finish dates
                        year_perc = float(
                            (df.index[-1] - df.index[0]).seconds /
                            (24.0 * 60.0 * 60.0)) / 365.0

                        logger.info("Constructing randomised trades for " +
                                    tick)

                        number_of_orders_min = int(
                            year_perc * number_of_orders_min_per_year)
                        number_of_orders_max = int(
                            year_perc * number_of_orders_max_per_year)

                        # Split up the data frame into equally sized chunks
                        df_orders = self._derive_order_no(
                            self._strip_columns(df, tick),
                            number_of_orders_min, number_of_orders_max)

                        # Don't want a memory leak, so delete this as soon possible from memory!
                        del df

                        # order_counter = 0

                        logger.info("Now beginning order construction for " +
                                    tick)

                        # For each order create randomised associated trades
                        # group together all the trades per day as orders
                        for df_order in df_orders:

                            # Set duration of the grandparent order (find randomised start/finish time)
                            # somewhere between 0-25% for start, and 75% to 100% for end point
                            df_order = self.rand_time_series.randomly_truncate_data_frame_within_bounds(
                                df_order, start_perc=0.25, finish_perc=0.75)

                            logger.debug("Creating order between " +
                                         str(df_order.index[0]) + " - " +
                                         str(df_order.index[-1]))

                            # Assume all orders/trades are in the same direction (which is randomly chosen)
                            buy_sell = randint(0, 1)

                            # Sell trades
                            if buy_sell == 0:
                                side_no = -1
                                side = 'bid'

                            # Buy trades
                            else:
                                side_no = 1
                                side = 'ask'

                            magnitude = 10000.0 * 2

                            if tick == 'USDJPY': magnitude = 100.0 * 2.0

                            if randint(0, 100) > 97:
                                new_tick = tick[3:6] + tick[0:3]

                                if 'ticker' in df_order.columns:
                                    df_order['ticker'] = new_tick

                                if 'bid' in df_order.columns and 'ask' in df_order.columns:
                                    ask = 1.0 / df_order['bid']
                                    bid = 1.0 / df_order['ask']

                                    df_order['bid'] = bid
                                    df_order['ask'] = ask

                                df_order['mid'] = 1.0 / df_order['mid']
                            else:
                                new_tick = tick

                            # Get 'bid' for sells, and 'ask' for buys
                            df_order['trade_value'] = df_order[side]

                            # We want to simulate the executions by perturbing the buys randomly
                            df_order = self.rand_time_series.randomly_perturb_column(
                                df_order,
                                column='trade_value',
                                magnitude=magnitude)

                            # Assume notional is in base currency in vast majority of cases
                            if randint(0, 100) > 97:
                                notional_currency = new_tick[3:6]
                            else:
                                notional_currency = new_tick[0:3]

                            notional_multiplier = 1.0

                            if notional_currency == 'JPY':
                                notional_multiplier = 100.0

                            # Randomly choose a realistic order notional
                            # This will later be subdivided into trade notional
                            order_notional = randint(
                                order_min_size * notional_multiplier,
                                order_max_size * notional_multiplier)

                            order_additional_attributes = {
                                'broker_id':
                                constants.test_brokers_dictionary['All'],
                                'broker_sub_id':
                                constants.test_sub_brokers_dictionary['All'],
                                'algo_id':
                                constants.test_algos_dictionary['All'],
                                'algo_settings':
                                'default',
                            }

                            # Construct an order and add it to list
                            ind_order = self._construct_order(
                                df_order,
                                order_type='order',
                                notional=order_notional,
                                notional_currency=notional_currency,
                                side=side_no,
                                tick=new_tick,
                                additional_attributes=
                                order_additional_attributes)

                            order_list.append(ind_order)

                            trade_additional_attributes = self.grab_attributes_from_trade_order(
                                ind_order, [
                                    'broker_id', 'broker_sub_id', 'algo_id',
                                    'algo_settings'
                                ])

                            # Now create all the broker messages for the order

                            # These will consist firstly of placement messages
                            # then potentionally cancels, cancel/replace and in most cases we randomly assign trade fills
                            trade_list = self._create_trades_from_order(
                                trade_list=trade_list,
                                df_order=df_order,
                                tick=new_tick,
                                ind_order=ind_order,
                                side_no=side_no,
                                order_notional=order_notional,
                                notional_currency=notional_currency,
                                additional_attributes=
                                trade_additional_attributes)

                            # order_counter = order_counter + 1

        # Aggregate all the lists into DataFrames (setting 'date' as the index)

        # For the trade dataframe also drop the 'index' column which was previous used to ensure that fills, were after placements
        trade_order_dict = {
            'trade_df':
            self.time_series_ops.aggregate_dict_to_dataframe(
                trade_list, 'date', 'index'),
            'order_df':
            self.time_series_ops.aggregate_dict_to_dataframe(
                order_list, 'date')
        }

        return trade_order_dict

    def _create_trades_from_order(self,
                                  trade_list=None,
                                  df_order=None,
                                  tick=None,
                                  ind_order=None,
                                  side_no=None,
                                  order_notional=None,
                                  notional_currency=None,
                                  additional_attributes=None):

        trade_notional = order_notional

        # Assume placement at start of order (a placement will have the order notional)
        placement_event = self.construct_trade(
            df_order,
            order_notional=order_notional,
            execution_venue=constants.test_venues_dictionary['All'],
            order=ind_order,
            side=side_no,
            tick=tick,
            event_type='placement',
            notional_currency=notional_currency,
            additional_attributes=additional_attributes)

        trade_list.append(placement_event)

        # Randomly choose an event (cancel/replace + fill, cancel or fill)
        i = randint(0, 1000)

        # Very rare event, same timestamp for a trade, same size too (but different ID)
        if i < 1:

            # executed trade
            fill_event = self.construct_trade(
                df_order,
                order=ind_order,
                order_notional=order_notional,
                execution_venue=placement_event['venue'],
                notional_currency=notional_currency,
                executed_notional=int(float(trade_notional) * 0.5),
                side=side_no,
                tick=tick,
                event_type='trade',
                index=min(len(df_order.index), 5),
                additional_attributes=additional_attributes)

            trade_list.append(fill_event)

            fill_event = self.construct_trade(
                df_order.copy(),
                order=ind_order,
                order_notional=order_notional,
                execution_venue=placement_event['venue'],
                notional_currency=notional_currency,
                executed_notional=int(float(trade_notional) * 0.5),
                side=side_no,
                tick=tick,
                event_type='trade',
                index=min(len(df_order.index), 5),
                additional_attributes=additional_attributes)

            trade_list.append(fill_event)
        elif i < 50:
            # Cancel/replace event
            cancel_replace_index = randint(1, min(len(df_order.index), 20))

            cancel_replace_event = self.construct_trade(
                df_order,
                order=ind_order,
                execution_venue=placement_event['venue'],
                notional_currency=notional_currency,
                side=side_no,
                tick=tick,
                event_type='cancel/replace',
                index=cancel_replace_index,
                additional_attributes=additional_attributes)

            trade_list.append(cancel_replace_event)

            fill_event_index = randint(cancel_replace_index + 1,
                                       min(len(df_order.index), 50))

            # Executed fill event
            fill_event = self.construct_trade(
                df_order,
                order=ind_order,
                order_notional=order_notional,
                execution_venue=placement_event['venue'],
                executed_notional=trade_notional,
                notional_currency=notional_currency,
                side=side_no,
                tick=tick,
                event_type='trade',
                index=fill_event_index,
                additional_attributes=additional_attributes)

            trade_list.append(fill_event)

        # Rare event, full cancellation of order
        elif i < 60:
            cancel_index = randint(1, min(len(df_order.index), 20))

            cancel_event = self.construct_trade(
                df_order,
                order=ind_order,
                execution_venue=placement_event['venue'],
                executed_notional=0,
                notional_currency=notional_currency,
                side=side_no,
                tick=tick,
                event_type='cancel',
                index=cancel_index,
                additional_attributes=additional_attributes)

            trade_list.append(cancel_event)

        elif i < 80:
            # Where we have two trade fills for a single child order of different sizes
            perc = float(randint(5, 95)) / 100.0

            # executed trade
            fill_event = self.construct_trade(
                df_order,
                order=ind_order,
                execution_venue=placement_event['venue'],
                notional_currency=notional_currency,
                executed_notional=int(float(trade_notional) * perc),
                side=side_no,
                tick=tick,
                event_type='trade',
                index=randint(1, min(len(df_order.index), 50)),
                additional_attributes=additional_attributes)

            trade_list.append(fill_event)

            fill_event = self.construct_trade(
                df_order,
                order=ind_order,
                execution_venue=placement_event['venue'],
                notional_currency=notional_currency,
                executed_notional=int(float(trade_notional) * (1.0 - perc)),
                side=side_no,
                tick=tick,
                event_type='trade',
                index=randint(fill_event['index'], min(len(df_order.index),
                                                       100)),
                additional_attributes=additional_attributes)

            trade_list.append(fill_event)

        # Most common event, single trade/fill
        else:
            # Executed trade
            fill_event = self.construct_trade(
                df_order,
                order=ind_order,
                order_notional=order_notional,
                execution_venue=placement_event['venue'],
                notional_currency=notional_currency,
                executed_notional=trade_notional,
                side=side_no,
                tick=tick,
                event_type='trade',
                index=randint(1, min(len(df_order.index), 50)),
                additional_attributes=additional_attributes)

            trade_list.append(fill_event)

        return trade_list

    def _derive_order_no(self, df, orders_min, orders_max):
        df_chunks_list = self.time_series_ops.split_array_chunks(
            df, chunks=randint(orders_min, orders_max))

        if isinstance(df_chunks_list, pd.DataFrame):
            return [df_chunks_list]

        return df_chunks_list

    def _create_unique_trade_id(self, order_type, ticker, datetime_input):
        return order_type + "_" + ticker + str(datetime_input) + "_" + str(
            datetime.datetime.utcnow()) + '_' + str(randint(0, 100000))

    def _construct_order(self,
                         df,
                         order_type=None,
                         notional=None,
                         notional_currency=None,
                         side=None,
                         tick=None,
                         additional_attributes=None,
                         **kwargs):

        order = {}

        # For internal purposes
        order['ticker'] = tick
        order['notional'] = notional

        order['notional_currency'] = notional_currency

        order['side'] = side

        order['date'] = df.index[0]
        order['benchmark_date_start'] = df.index[0]
        order['benchmark_date_end'] = df.index[-1]

        order['price_limit'] = df['mid'][0]
        order['arrival_price'] = df['mid'][0]

        order['portfolio_id'] = self.add_random_sample(
            constants.test_portfolios_dictionary['All'])
        order['portfolio_manager_id'] = self.add_random_sample(
            constants.test_portfolio_managers_dictionary['All'])
        order['trader_id'] = self.add_random_sample(
            constants.test_traders_dictionary['All'])
        order['account_id'] = self.add_random_sample(
            constants.test_accounts_dictionary['All'])

        order['id'] = self._create_unique_trade_id(order_type, tick,
                                                   order['date'])

        kwargs['order'] = order

        order = self.additional_order_processing(**kwargs)

        # Add additional randomized attributes
        if additional_attributes is not None:

            # Merge list of additional attributes
            if isinstance(additional_attributes, list):
                result_dict = {}
                for d in additional_attributes:
                    result_dict.update(d)

                additional_attributes = result_dict

            for k in additional_attributes.keys():

                additional = additional_attributes[k]

                if isinstance(additional, list):
                    additional = self.add_random_sample(additional)

                order[k] = additional

        return order

    def additional_order_processing(self, **kwargs):

        return kwargs['order']

    def construct_trade(self,
                        df,
                        order_notional=None,
                        executed_notional=None,
                        notional_currency=None,
                        execution_venue=None,
                        side=None,
                        order=None,
                        tick=None,
                        event_type=None,
                        additional_attributes=None,
                        index=0):

        trade = {}

        if order_notional is None:
            order_notional = 0

        trade['order_notional'] = order_notional
        trade['notional_currency'] = notional_currency
        trade['ticker'] = tick
        trade['side'] = side
        trade['index'] = index
        trade['date'] = df.index[index]
        trade['market_bid'] = df['bid'][index]
        trade['market_ask'] = df['ask'][index]
        trade['market_mid'] = df['mid'][index]
        trade['price_limit'] = df['mid'][index]

        trade['event_type'] = event_type

        trade['executed_price'] = 0
        trade['venue'] = execution_venue
        trade['executed_notional'] = 0

        if event_type == 'trade':
            trade['executed_notional'] = executed_notional

            try:
                if np.isnan(trade['executed_notional']):
                    pass

            except:
                print('w')

            trade['executed_price'] = df['trade_value'][index]

        trade['venue'] = self.add_random_sample(
            constants.test_venues_dictionary['All'])

        if order is not None:
            trade[constants.order_name + '_pointer_id'] = order['id']
            trade['price_limit'] = order['price_limit']

            trade['portfolio_id'] = order['portfolio_id']
            trade['portfolio_manager_id'] = order['portfolio_manager_id']
            trade['trader_id'] = order['trader_id']
            trade['account_id'] = order['account_id']

        trade['id'] = self._create_unique_trade_id('execution', tick,
                                                   trade['date'])

        if additional_attributes is not None:
            for k in additional_attributes.keys():
                trade[k] = additional_attributes[k]

        return trade

    def add_random_sample(self, lst):

        return lst[randint(0, len(lst) - 1)]

    def grab_attributes_from_trade_order(self, trade_order, attributes):

        dict = {}

        for a in attributes:
            dict[a] = trade_order[a]

        return dict

    def _strip_columns(self, df, tick):

        # filter market data so only includes specific asset (Arctic won't have this) and during "main" FX hours
        # exclude any Saturday data
        if 'ticker' in df.columns:
            df = df[(df.index.hour >= 6) & (df.index.hour < 21) &
                    (df.index.dayofweek != 5) & (df['ticker'] == tick)]
        else:
            df = df[(df.index.hour >= 6) & (df.index.hour < 21) &
                    (df.index.dayofweek != 5)]

        keep_cols = ['bid', 'ask', 'mid']

        remove_cols = []

        for k in df.columns:
            if k not in keep_cols:
                remove_cols.append(k)

        if remove_cols != []:
            df.drop(remove_cols, inplace=True, axis=1)

        # Ensure that the market is data is properly sorted
        df.sort_index(inplace=True)

        # Calculate mid price (if it doesn't exist)
        if 'mid' not in df.columns:
            df['mid'] = (df['bid'].values + df['ask'].values) / 2.0

        # Create synthetic bid/ask if they don't exist
        if 'bid' not in df.columns:
            df['bid'] = 0.9995 * df['mid'].values

        if 'ask' not in df.columns:
            df['ask'] = 1.0005 * df['mid'].values

        # First strip away out of hours times
        # remove any trades before 6am and after 9pm GMT
        return df
Пример #13
0
def test_write_multiple_wildcard_market_data_csvs_arctic():
    """Tests we can write sequential market data CSVs (or HDF5) whose path has been specified by a wildcard (eg. EURUSD*.csv).
    It is assumed that the CSVs are in chronological orders, from their filenames.
    """
    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    arctic_start_date = '01 Jan 2016'
    arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    for a in arctic_lib_type:
        database_source = DatabaseSourceArctic(postfix='testharness',
                                               arctic_lib_type=a)

        ### Read CSV data which is sorted ascending (default!)
        database_source.convert_csv_to_table(
            csv_market_data_store,
            ticker,
            test_harness_arctic_market_data_table,
            if_exists_table='replace',
            if_exists_ticker='replace',
            market_trade_data='market',
            csv_read_chunksize=10**6,
            remove_duplicates=False)

        database_source_csv = DatabaseSourceCSV(
            market_data_database_csv=csv_market_data_store)

        market_df_csv = database_source_csv.fetch_market_data(
            start_date=arctic_start_date,
            finish_date=arctic_finish_date,
            ticker=ticker)

        # Prepare the CSV folder first
        csv_folder = os.path.join(constants.test_data_harness_folder,
                                  'csv_arctic_mult')

        # Empty the CSV test harness folder, where we shall dump the mini CSVs
        UtilFunc().forcibly_create_empty_folder(csv_folder)

        # Split the CSV file into several mini CSV files (and also HDF5 files)
        market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv,
                                                            chunks=3)

        chunk_no = 0

        for m in market_df_list:
            m.to_csv(
                os.path.join(csv_folder, "EURUSD" + str(chunk_no) + '.csv'))
            UtilFunc().write_dataframe_to_binary(
                m,
                os.path.join(csv_folder,
                             "EURUSD" + str(chunk_no) + '.parquet'),
                format='parquet')

            chunk_no = chunk_no + 1

        file_ext = ['csv', 'parquet']

        for f in file_ext:
            ### Read CSV data from the mini CSVs (using wildcard char) and dump to Arctic
            database_source.convert_csv_to_table(
                os.path.join(csv_folder, "EURUSD*." + f),
                ticker,
                test_harness_arctic_market_data_table,
                if_exists_table='append',
                if_exists_ticker='replace',
                market_trade_data='market',
                csv_read_chunksize=10**6,
                remove_duplicates=False)

            market_request = MarketRequest(
                start_date=arctic_start_date,
                finish_date=arctic_finish_date,
                ticker=ticker,
                data_store=database_source,
                market_data_database_table=test_harness_arctic_market_data_table
            )

            # Read back from Arctic
            market_df_load = market_loader.get_market_data(
                market_request=market_request)

            # Compare reading directly from the original large CSV vs. reading back from arctic (which was dumped from split CSVs)
            diff_df = abs(market_df_load['mid'] - market_df_csv['mid'])

            outside_bounds = diff_df[diff_df >= eps]

            assert len(outside_bounds) == 0
        csv_file = [x + '_' + data_vendor + '_20*.' + file_extension for x in
                    ticker_mkt]  # assume that ALL TIME IN UTC!

        date_format = None
        read_in_reverse = False
        remove_duplicates = False

    ####################################################################################################################

    # Load market data
    data_source_csv = DatabaseSourceCSV()

    # Create market data store for database and associated data vendor
    if market_data_store == 'arctic':
        database_source = DatabaseSourceArctic(postfix=data_vendor)
        market_data_database_table = constants.arctic_market_data_database_table

    if market_data_store == 'pystore':
        database_source = DatabaseSourcePyStore(postfix=data_vendor)
        market_data_database_table = constants.pystore_market_data_database_table

    if market_data_store == 'influxdb':
        database_source = DatabaseSourceInfluxDB(postfix=data_vendor)
        market_data_database_table = constants.influxdb_market_data_database_table

    if market_data_store == 'kdb':
        database_source = DatabaseSourceKDB(postfix=data_vendor)
        market_data_database_table = constants.kdb_market_data_database_table

    if csv_folder is None:
Пример #15
0
    # Warning for high frequency data file sizes might be very big, so you may need to reduce this!
    start_date = '01 Jan 2016'
    finish_date = '07 Apr 2020'

    tickers = [
        'EURUSD', 'GBPUSD', 'AUDUSD', 'NZDUSD', 'USDCAD', 'USDCHF', 'EURNOK',
        'EURSEK', 'USDJPY'
    ]

    if database_dialect == 'arctic':
        from tcapy.data.databasesource import DatabaseSourceArctic as DatabaseSource
    elif database_dialect == 'pystore':
        from tcapy.data.databasesource import DatabaseSourcePyStore as DatabaseSource

    database_source = DatabaseSource(postfix=data_vendor)

    file_format = 'parquet'

    for t in tickers:
        market_df = database_source.fetch_market_data(start_date=start_date,
                                                      finish_date=finish_date,
                                                      ticker=t)

        key = '_' + data_vendor + "_" + \
              (str(market_df.index[0]) + str(market_df.index[-1])).replace(":", '_').replace(" ", '_')
        filename = os.path.join(folder, t + key) + '.' + file_format

        if market_df is not None:
            c = market_df.columns
            print('Writing ' + t + ' to ' + filename)
Пример #16
0
        date_format = None

        read_in_reverse = False

        remove_duplicates = True

        ####################################################################################################################

    # Load market data
    data_source_csv = DatabaseSourceCSV()

    market_data_store = 'arctic'

    if market_data_store == 'arctic':
        database_source = DatabaseSourceArctic(postfix=source)
        market_data_database_table = constants.arctic_market_data_database_table

    if folder is None:
        folder = Constants().test_data_folder

    # This relies on you have market data stored in H5/CSV files already (eg. by downloading from DukasCopy)
    # note: whilst free FX data can be used for testing (in particular for generating randomised trades),
    # we recommend using higher quality data for actual benchmark

    csv_market_data = [os.path.join(folder, x) for x in csv_file]

    # for each ticker, read in the H5/CSV file and then dump into tick database
    for i in range(0, len(ticker_mkt)):
        ticker = ticker_mkt[i]
        csv_file = csv_market_data[i]
Пример #17
0
    csv_marker = 'small_test'
    start_date_trade_generation = '01 Apr 2017'
    finish_date_trade_generation = '05 Jun 2017'

    # csv_marker = 'large_test'
    # start_date_trade_generation = '01 Apr 2016'; finish_date_trade_generation = '31 Mar 2020'

    # 'mysql' or 'ms_sql_server'
    sql_trade_database_type = 'mysql'

    data_test_creator = DataTestCreator(
        market_data_postfix=data_source,
        sql_trade_database_type=sql_trade_database_type)

    # Use database source as Arctic for market data and SQL Server for trade/order data
    data_test_creator._database_source_market = DatabaseSourceArctic(
        postfix=data_source)

    # data_test_creator.fetch_test_database()

    # Load up market data from CSV and dump into SQL database
    ticker = [
        'EURUSD', 'GBPUSD', 'AUDUSD', 'NZDUSD', 'USDCAD', 'USDCHF', 'USDNOK',
        'USDSEK', 'EURNOK', 'EURSEK', 'USDTRY', 'USDJPY'
    ]

    # Generate trades/orders for these _tickers
    ticker_trade = [
        'EURUSD', 'GBPUSD', 'AUDUSD', 'NZDUSD', 'USDCAD', 'USDCHF', 'EURNOK',
        'EURSEK', 'USDJPY', 'AUDJPY', 'NZDCAD', 'EURJPY'
    ]
Пример #18
0
    def upload_market_data_flat_file(self, data_vendor='dukascopy', market_data_store='arctic', server_host=None,
                                     server_port=None,
                                     ticker_mkt=['EURUSD', 'GBPUSD', 'AUDUSD', 'NZDUSD', 'USDCAD', 'USDCHF',
                                                 'EURNOK', 'EURSEK', 'USDJPY',
                                                 'USDNOK', 'USDSEK', 'EURJPY',
                                                 'USDMXN', 'USDTRY', 'USDZAR', 'EURPLN'],
                                     csv_folder=None,
                                     if_exists_table='replace',
                                     if_append_replace_ticker='replace',
                                     file_extension='parquet',
                                     read_in_reverse=False, remove_duplicates=False, date_format=None,
                                     plot_back_data=False, access_control=AccessControl(), market_data_database_table=None):

        logger = LoggerManager.getLogger(__name__)

        # Files dumped by DatabasePopulator look like this
        ## 'AUDUSD_dukascopy_2016-01-03_22_00_01.868000+00_002016-01-31_23_59_57.193000+00_00.parquet'
        csv_file = [x + '_' + data_vendor + '_20*.' + file_extension for x in
                    ticker_mkt]  # assume that ALL TIME IN UTC!

        ####################################################################################################################

        # Load market data

        # Create market data store for database and associated data vendor
        if market_data_store == 'arctic':
            if server_host is None:
                server_host = constants.arctic_host

            if server_port is None:
                server_port = constants.arctic_port

            database_source = DatabaseSourceArctic(postfix=data_vendor,
                                                username=access_control.arctic_username,
                                                password=access_control.arctic_password,
                                                server_host=server_host, server_port=server_port)

            if market_data_database_table is None:
                market_data_database_table = constants.arctic_market_data_database_table

        if market_data_store == 'pystore':
            database_source = DatabaseSourcePyStore(postfix=data_vendor)

            if market_data_database_table is None:
                market_data_database_table = constants.pystore_market_data_database_table

        if market_data_store == 'influxdb':
            if server_host is None:
                server_host = constants.influxdb_host

            if server_port is None:
                server_port = constants.influxdb_port

            database_source = DatabaseSourceInfluxDB(postfix=data_vendor,
                                                     username=access_control.influxdb_username,
                                                     password=access_control.influxdb_password,
                                                     server_host=server_host, server_port=server_port)

            if market_data_database_table is None:
                market_data_database_table = constants.influxdb_market_data_database_table

        if market_data_store == 'kdb':
            if server_host is None:
                server_host = constants.kdb_host

            if server_port is None:
                server_port = constants.kdb_port

            database_source = DatabaseSourceKDB(postfix=data_vendor,
                                                username=access_control.kdb_username,
                                                password=access_control.kdb_password,
                                                server_host=server_host, server_port=server_port)

            if market_data_database_table is None:
                market_data_database_table = constants.kdb_market_data_database_table

        if csv_folder is None:
            csv_folder = constants.test_data_folder

        # This relies on you have market data stored in Parquet/H5/CSV files already (eg. by downloading from DukasCopy)
        # note: whilst free FX data can be used for testing (in particular for generating randomised trades),
        # you may to choose other high frequency quality data for actual benchmark

        csv_market_data = [os.path.join(csv_folder, x) for x in csv_file]

        # For each ticker, read in the H5/CSV file and then dump into tick database
        # Potentionally, we can thread this?
        for i in range(0, len(ticker_mkt)):
            ticker = ticker_mkt[i]
            csv_file = csv_market_data[i]

            # On the second time through the loop, we make sure to append to table
            # otherwise will keep overwriting!
            if if_exists_table == 'replace':
                if i >= 1:
                    if_exists_table = 'append'
                else:
                    if_exists_table = 'replace'

            database_source.convert_csv_to_table(csv_file, ticker, market_data_database_table,
                                                 if_exists_table=if_exists_table,
                                                 remove_duplicates=remove_duplicates,
                                                 if_exists_ticker=if_append_replace_ticker, date_format=date_format,
                                                 read_in_reverse=read_in_reverse)

            # It is worth plotting the data to check validity sometimes (make sure you choose appropriate start/finish dates
            # loading a *very* large tick history into memory will result in your computer running out of memory
            if plot_back_data:
                from chartpy import Chart

                import datetime
                import pandas as pd

                df = database_source.fetch_market_data(start_date='01 Jan 2000',
                                                       finish_date=datetime.datetime.utcnow(),
                                                       ticker=ticker)

                df = pd.DataFrame(df.resample('5min').mean())

                if 'mid' not in df.columns:
                    df['mid'] = (df['bid'] + df['ask']) / 2.0

                df = pd.DataFrame(df['mid'])

                Chart(engine='plotly').plot(df)

                print(df)

        logger.info("Finished uploading data to " + market_data_store)