def test_new():
    GIVEN("a data container")
    data_container = DataContainer()
    WHEN("we call new")
    new_container = data_container.new()
    THEN("a data container is returned")
    assert type(new_container) is type(data_container)
    THEN("the new data container is a different instance")
    assert new_container is not data_container
def test_sum_columns():
    GIVEN("a simple set of data and a container")
    data = __get_test_dict()
    data_container = DataContainer(data)
    WHEN("we sum columns A and B to give D")
    data_container.sum_columns(output="D", columns=["A", "C"])
    THEN("the resulting data is correct")
    col_d = data_container.get_column(name="D")
    for row in range(len(data.get("C"))):
        assert col_d[row] == not_a_number_to_number(
            data.get("A")[row]) + not_a_number_to_number(data.get("C")[row])
示例#3
0
def test_get_ids_for_model_data():
    GIVEN("a data handler with some data and two fixed probabilities")
    GIVEN("a data handler and the directory and file name of a test file")
    directory = "./data/29184567"
    file_name = "1.156230797.txt"
    file = HistoricalExternalAPIFileHander(directory=directory, file=file_name)
    record = file.get_file_as_list()[0]
    market_start_time = file.get_market_start_time()

    adapter = ExternalAPIMarketRecordAdapter(
        market_start_time=market_start_time)
    mediator = MockMediator()

    handler = DataHandler(
        mediator=mediator,
        adapter=adapter,
        container=DataContainer(),
    )
    handler.process_data(record)

    WHEN(
        "we set the probabilities of two items and get the ids required for the next model run"
    )
    ids = handler.get_unique_ids()
    for runner_id in ids[0:2]:
        handler._set_probability(runner_id=runner_id, probability=0.1)
        ids.pop(0)

    THEN("the list omits the items which have fixed probabilities")
    model_ids = handler._get_ids_for_model_data()
    assert model_ids == ids
    assert len(model_ids) < len(handler.get_unique_ids())
def test_get_last_column_entry():
    GIVEN("a simple set of data and a container")
    data = __get_test_dict()
    data_container = DataContainer(data)

    WHEN("we get the last entry for A")
    last_a = data_container.get_last_column_entry("A")
    THEN("the correct value is returned")
    assert last_a == data.get("A")[-1]
    assert isinstance(last_a, float)

    WHEN("we get the last entry for B")
    last_b = data_container.get_last_column_entry("B")
    THEN("the correct value is returned")
    assert last_b == data.get("B")[-1]
    assert isinstance(last_b, str)

    WHEN("we get the last entry for C")
    last_c = data_container.get_last_column_entry("C")
    THEN("the correct value is returned")
    assert is_not_a_number(last_c)
def test_add_rows():
    GIVEN("two data containers containing simple data")
    data = __get_test_dict()
    data_container = DataContainer(data)
    data_to_add = {"B": [12], "A": [6], "D": ["extra column"]}
    data_container_to_add = DataContainer(data_to_add)
    WHEN("we add the first to the second")
    data_container.add_rows(data_container_to_add)
    THEN("we have the correct number of rows and columns")
    unique_keys = set().union(data.keys(), data_to_add.keys())
    assert data_container.get_row_count() == max([
        len(data.get(key) or []) + len(data_to_add.get(key) or [])
        for key in unique_keys
    ])
    assert data_container.get_column_count() == len(unique_keys)
示例#6
0
    def __init__(
        self,
        market_id,
        external_api,
        market_start_time,
        data_adapter=None,
        bank=5000,
        data=None,
        models=None,
        orders=None,
    ):

        self.__market_id = market_id

        self.external_api: Colleague = external_api

        adapter = data_adapter or ExternalAPIMarketRecordAdapter(
            market_start_time=market_start_time)
        self.data: Colleague = data or DataHandler(
            mediator=self,
            adapter=adapter,
            container=DataContainer(),
        )

        self.models: Colleague = models or ModelHandler(
            mediator=self, wls_model=WeightedLinearRegression())

        self.orders: Colleague = orders or OrdersHandler(mediator=self,
                                                         bank=bank)

        self.__recipients = {
            "external data fetched": self.data.process_data,
            "data added to container": self.models.run_models,
            "models have results": self.orders.get_new_orders,
            "new orders": self.external_api.post_order,
            "orders posted": self.__delegate_posted_orders,
            "market closed": self.__exit,
            "no data provided multiple times": self.__exit,
            "finished processing": self.__finished,
        }
示例#7
0
def test_confirm_market_closed():
    GIVEN("a data handler and the directory and file name of a test file")
    adapter = ExternalAPIMarketRecordAdapter(
        market_start_time="2019-01-01T00:00:00.000Z")
    mediator = MockMediator()

    handler = DataHandler(mediator=mediator,
                          adapter=adapter,
                          container=DataContainer())

    WHEN("we check if the market is closed")
    closed = handler._confirm_market_closed()
    THEN("it is not")
    assert not closed

    GIVEN("the handler's container has the required column" +
          " but it does not indicate that the market is closed")
    closed_record = handler._container.new(
        data={("closed_indicator", ""): [0]})
    handler._container.add_rows(container=closed_record)

    WHEN("we check if the market is closed")
    closed = handler._confirm_market_closed()
    THEN("it is not")
    assert not closed

    GIVEN(
        "the handler's container has the required column indicating that the market is closed"
    )
    closed_record = handler._container.new(
        data={("closed_indicator", ""): [1]})
    handler._container.add_rows(container=closed_record)

    WHEN("we check if the market is closed")
    closed = handler._confirm_market_closed()
    THEN("it is")
    assert closed
def test_has_column():
    GIVEN("a simple set of data and a container")
    data = __get_test_dict()
    data_container = DataContainer(data)

    WHEN("we check if the container has the column A")
    true = data_container.has_column("A")
    THEN("it does")
    assert true

    WHEN("we check if the container has the column wwwwweeeeeeeeeeeee")
    true = data_container.has_column("wwwwweeeeeeeeeeeee")
    THEN("it does not")
    assert not true

    GIVEN("an empty container")
    data_container = DataContainer()

    WHEN("we check if the container has the column ('closed_indicator','')")
    true = data_container.has_column(("closed_indicator", ""))
    THEN("it does not")
    assert not true
示例#9
0
def test_removed_runner():
    GIVEN(
        "the directory and file name of a test file which contains a removed runner"
    )
    directory = "./data/29201704"
    file_name = "1.156695742.txt"
    file = HistoricalExternalAPIFileHander(directory=directory, file=file_name)
    file_data = file.get_file_as_list()
    market_start_time = file.get_market_start_time()
    adapter = ExternalAPIMarketRecordAdapter(
        market_start_time=market_start_time)
    number_runners = __get_number_runners(data=file_data)
    mediator = MockMediator()

    WHEN("we feed the data into a handler one record at a time")

    handler = DataHandler(
        mediator=mediator,
        adapter=adapter,
        container=DataContainer(),
    )
    for i, record in enumerate(file_data):
        handler.process_data(record)
        THEN("the incoming record was processed")
        number_records_processed = i + 1
        THEN("the data container the correct number of records")
        assert handler._container.get_row_count() == number_records_processed

    WHEN("we have finished")
    THEN("the data container has the correct number of columns")
    assert handler._container.get_column_count() == __get_number_columns(
        number_runners)
    THEN("the data container has the same number of records as the raw data")
    assert handler._container.get_row_count() == len(file_data)
    THEN("the correct number of runners are contained in the object")
    assert len(handler.get_unique_ids()) == number_runners
示例#10
0
def test_fixed_probability(mock_notify):
    GIVEN("a data handler and the directory and file name of a test file")

    directory = "./data/29451865"
    file_name = "1.162069495.txt"
    file = HistoricalExternalAPIFileHander(directory=directory, file=file_name)
    file_data = file.get_file_as_list()
    market_start_time = file.get_market_start_time()

    number_runners = __get_number_runners(data=file_data)
    unfixed_items = number_runners
    fixed_items = 0
    adapter = ExternalAPIMarketRecordAdapter(
        market_start_time=market_start_time)
    pricer = PriceHandler()
    metadata = MetadataHandler()
    mediator = MockMediator()
    correct_probability = 1

    number_records_processed = 0

    WHEN("we feed the data into a handler one record at a time")
    handler = DataHandler(
        mediator=mediator,
        adapter=adapter,
        container=DataContainer(),
    )
    for i, record in enumerate(file_data):
        number_records_processed = i + 1
        if number_records_processed % 10 == 0:
            WHEN("we randomly fix the probability of an item")
            id_to_fix = handler._get_ids_for_model_data()[0]
            fixed_probability = round(
                handler._container.get_last_column_entry(
                    name=("compositional_sp_probability", id_to_fix)),
                4,
            )
            handler._set_probability(runner_id=id_to_fix,
                                     probability=fixed_probability)
            correct_probability -= fixed_probability
            unfixed_items -= 1
            fixed_items += 1

        fixed_probability_ids = handler._get_fixed_probability_ids()
        THEN("the list of fixed probability ids is the correct length")
        assert len(fixed_probability_ids) == fixed_items

        handler.process_data(record)

        THEN("the handler's data has the correct number of records")
        assert handler._container.get_row_count() == number_records_processed

        THEN(
            "the mediator's notify method was called with the correct parameters"
        )
        model_data = handler._get_model_data()
        args, kwargs = mock_notify.call_args
        assert args == ()
        assert kwargs.get("data") == model_data
        assert kwargs.get("event") == "data added to container"

        THEN(
            "there is a record in the model data for each of the unfixed items"
        )
        assert len(model_data) == unfixed_items

        test_record = {
            each.get("id"): each
            for each in adapter.convert(record).get("items")
        }
        total_sp_probability = 0
        total_ex_probability = 0

        for data in model_data:
            THEN("each of the items in the model data has an non-zero id")
            runner_id = data.get("id")
            assert isinstance(runner_id, int)
            assert runner_id > 0

            THEN("the items probability has not been fixed")
            assert runner_id not in fixed_probability_ids

            test_item = test_record.get(runner_id)

            THEN("the data has the correct combined_back_size")
            combined_back_size = data.get("combined_back_size" +
                                          metadata.get_point_in_time_suffix())
            assert combined_back_size == (test_item.get("sp_back_size") +
                                          test_item.get("ex_back_size"))

            THEN(
                "the data contains the compositional sp probability which is between 0 and 1"
            )
            compositional_sp_probability = data.get(
                "compositional_sp_probability" +
                metadata.get_point_in_time_suffix())
            total_sp_probability += compositional_sp_probability
            assert 1 > compositional_sp_probability > 0

            THEN(
                "the data contains the compositional ex probability which is between 0 and 1"
            )
            compositional_ex_average_probability = data.get(
                "compositional_ex_average_probability" +
                metadata.get_point_in_time_suffix())
            total_ex_probability += compositional_ex_average_probability
            assert 1 > compositional_ex_average_probability > 0

            THEN("the data contains the correct offered price")
            offered_price = data.get("ex_offered_back_price" +
                                     metadata.get_point_in_time_suffix())
            assert offered_price > 0
            assert offered_price == test_item.get("ex_offered_back_price")

            THEN("the data contains the correct returns price")
            returns_price = data.get("ex_offered_back_price_mc" +
                                     metadata.get_point_in_time_suffix())
            assert returns_price > 0
            assert returns_price == pricer.remove_commission(
                test_item.get("ex_offered_back_price"))

            THEN(
                "the sp back price time series data returned is of the correct length"
            )
            compositional_sp_back_price_ts = (
                data.get("compositional_sp_back_price" +
                         metadata.get_time_series_suffix()) or [])
            assert len(
                compositional_sp_back_price_ts) == number_records_processed
            THEN(
                "the last record of the time series data matches the probability"
            )
            assert almost_equal(compositional_sp_back_price_ts[-1],
                                1 / compositional_sp_probability)

            THEN(
                "the extract time time series data returned is of the correct length"
            )
            extract_time_ts = (data.get("extract_time" +
                                        metadata.get_time_series_suffix())
                               or [])
            assert len(extract_time_ts) == number_records_processed
            for j, extract_time in enumerate(extract_time_ts):
                if j > 0:
                    THEN("the times in the series are ascending")
                    assert extract_time > extract_time_ts[j - 1]

            THEN(
                "the combined back size time series data returned is of the correct length"
            )
            combined_back_size_ts = (
                data.get("combined_back_size" +
                         metadata.get_time_series_suffix()) or [])
            assert len(combined_back_size_ts) == number_records_processed
            THEN(
                "the last entry in the time series is the same as point in time combined_back_size"
            )
            assert combined_back_size_ts[-1] == combined_back_size
            for j, combined_back_size in enumerate(combined_back_size_ts):
                if j > 0:
                    THEN("the sizes in the series are ascending")
                    assert combined_back_size >= combined_back_size_ts[j - 1]

        THEN("the total ex and sp probabilities from the model_data sum to 1")
        assert almost_equal(total_sp_probability, correct_probability)
        assert almost_equal(total_ex_probability, correct_probability)

    WHEN("we have finished")
    THEN("the data container has the correct number of columns")
    assert handler._container.get_column_count() == __get_number_columns(
        number_runners)
    THEN("the data container has the same number of records as the raw data")
    assert handler._container.get_row_count() == len(file_data)
    THEN("the correct number of runners are contained in the object")
    assert len(handler.get_unique_ids()) == number_runners
    THEN(
        "the correct number of fixed probabilities are contained in the object"
    )
    assert len(handler._get_fixed_probability_ids()) == round_down(
        number_records_processed / 10)
def test_column_group_name():
    GIVEN("some data that contains ids in the keys and" +
          " a container with the column group's name of the ids to be id")
    data = {
        ("col1", 123): [1, 2, 3, 4],
        ("col1", 456): [1, 2, 3, 4],
        ("col2", 123): [1, 2, 3, 4],
        ("col2", 456): [1, 2, 3, 4],
    }
    data_container = DataContainer(data)
    data_container.set_column_group_name(name="id", level=1)
    WHEN("we get the column names from the id group")
    ids = data_container.get_column_group_values(name="id")
    THEN("the correct ids are returned")
    assert lists_are_equal(ids, [123, 456])

    GIVEN("some data that contains ids in the keys and a" +
          " container with the column group's name of the ids to be id")
    data = {
        ("col1", 123): [1, 2, 3, 4],
        ("col1", 456): [1, 2, 3, 4],
        ("col2", 123): [1, 2, 3, 4],
        ("col2", 456): [1, 2, 3, 4],
    }
    data_container = DataContainer(data)
    data_container.set_column_group_name(names=["variable", "id"])
    WHEN("we get the column names from the id group")
    ids = data_container.get_column_group_values(name="id")
    THEN("the correct ids are returned")
    assert lists_are_equal(ids, [123, 456])
    WHEN("we get the column names from the variable group")
    variables = data_container.get_column_group_values(name="variable")
    THEN("the correct variables are returned")
    assert lists_are_equal(variables, ["col1", "col2"])

    GIVEN("some simple data and a container with the column name set to vars")
    data = {"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}
    data_container = DataContainer(data)
    data_container.set_column_group_name(name="vars")
    WHEN("we get the columns from the id group ")
    columns = data_container.get_column_group_values(name="vars")
    THEN("the correct columns are returned")
    assert lists_are_equal(columns, ["col1", "col2"])
def test_set_index():
    GIVEN("a simple set of data and a container")
    data = __get_test_dict()
    data_container = DataContainer(data)
    WHEN("we set the index to be A")
    data_container.set_index(columns=["A"])
    assert data_container.get_column_count() == len(data.keys()) - 1
    assert lists_are_equal(data_container.get_index(), data.get("A"))

    GIVEN("a simple set of data and a container")
    data = __get_test_dict()
    data_container = DataContainer(data)
    WHEN("we set the index to be A and B")
    data_container.set_index(columns=["A", "B"])
    assert data_container.get_column_count() == len(data.keys()) - 2
    assert lists_are_equal(
        data_container.get_index(),
        [(data.get("A")[row], data.get("B")[row])
         for row in range(len(data.get("B")))],
    )