Exemplo n.º 1
0
def test_massive_managing_2(queue_name, execution_nb, columns,
                            dataframe_max_size, chunk_size):
    dataframe = DataFrame(columns=columns)
    assign_dataframe(dataframe, dataframe_max_size, columns, queue_name)

    @managing(queue_name=queue_name)
    @adding(queue_name=queue_name)
    def add_rows(rows_nb: int) -> List[Tuple[str, Dict]]:
        result = list()
        for _ in range(rows_nb):
            index = numpy.random.rand(1)[0]
            columns_dict = dict()
            for column in columns:
                columns_dict[column] = numpy.random.rand(1)[0]
            dataframe.at[index] = Series(data=columns_dict)
            result.append((index, columns_dict))
        return result

    assert dataframe.empty
    start = time.time()
    for i in range(int(execution_nb / chunk_size)):
        add_rows(chunk_size)
        assert len(dataframe) == chunk_size*(i+1) if chunk_size*(i+1) <= dataframe_max_size \
            else dataframe_max_size
    end = time.time()
    assert len(dataframe) == dataframe_max_size

    print("\n{} managing execution time : {} s".format(queue_name,
                                                       end - start))
Exemplo n.º 2
0
def test_parallel_2():
    selected_columns_a = ["A", "B"]
    selected_columns_b = ["C", "D"]

    @synchronized(queue_name='TEST_3')
    @managing(queue_name='TEST_3')
    @adding(queue_items_creation_function=create_queue_item,
            other_args={"selected_columns": selected_columns_a},
            queue_name='TEST_3')
    def parallel_add_row_a(dataframe: DataFrame, index: str,
                           columns_dict: dict) -> Tuple[str, Dict]:
        return add_row(dataframe, index, columns_dict)

    @synchronized(queue_name='TEST_4')
    @managing(queue_name='TEST_4')
    @adding(queue_items_creation_function=create_queue_item,
            other_args={"selected_columns": selected_columns_b},
            queue_name='TEST_4')
    def parallel_add_row_b(dataframe: DataFrame, index: str,
                           columns_dict: dict) -> Tuple[str, Dict]:
        return add_row(dataframe, index, columns_dict)

    def thread_adding(operation_number: int, dataframe: DataFrame,
                      adding_function: Callable):
        for _ in range(operation_number):
            adding_function(
                dataframe, str(uuid4()), {
                    'A': str(uuid4()),
                    'B': str(uuid4()),
                    'C': str(uuid4()),
                    'D': str(uuid4())
                })

    dataframe = DataFrame(columns=['A', 'B', 'C', 'D'])
    assign_dataframe(dataframe, 1000, selected_columns_a, 'TEST_3')
    assign_dataframe(dataframe, 500, selected_columns_b, 'TEST_4')

    assert dataframe.empty

    # noinspection PyProtectedMember
    queue_handler_instance = QueuesHandler._QueuesHandler__instance
    assert id(QueuesHandler._QueuesHandler__instance.get_assigned_lock('TEST_3')) != \
           id(queue_handler_instance.get_assigned_lock(QueuesHandler().default_queue_name))
    assert id(QueuesHandler._QueuesHandler__instance.get_assigned_lock('TEST_4')) != \
           id(queue_handler_instance.get_assigned_lock(QueuesHandler().default_queue_name))
    assert id(QueuesHandler._QueuesHandler__instance.get_assigned_lock('TEST_3')) == \
           id(queue_handler_instance.get_assigned_lock('TEST_4'))

    with ThreadPoolExecutor(max_workers=2) as executor:
        future_a = executor.submit(thread_adding, 4000, dataframe,
                                   parallel_add_row_a)
        future_b = executor.submit(thread_adding, 4000, dataframe,
                                   parallel_add_row_b)
        future_a.result()
        future_b.result()

    # We can't predict if dataframe's size will be 500 or 1000
    assert len(dataframe) in [500, 1000]
Exemplo n.º 3
0
def test_parallel_1(queue_name):
    selected_columns = ["A", "C"]
    queue_name = queue_name if queue_name is not None else QueuesHandler(
    ).default_queue_name

    @synchronized(queue_name=queue_name)
    @managing(queue_name=queue_name)
    @adding(queue_items_creation_function=create_queue_item,
            other_args={"selected_columns": selected_columns},
            queue_name=queue_name)
    def parallel_add_row(dataframe: DataFrame, index: str,
                         columns_dict: dict) -> Tuple[str, Dict]:
        return add_row(dataframe, index, columns_dict)

    @synchronized(queue_name=queue_name)
    @adding(queue_items_creation_function=create_queue_item,
            other_args={"selected_columns": selected_columns},
            queue_name=queue_name)
    def parallel_change_row_value(dataframe: DataFrame, index: str,
                                  new_columns_dict: dict) -> Tuple[str, Dict]:
        return change_row_value(dataframe, index, new_columns_dict)

    def thread_adding(operation_number: int, dataframe: DataFrame):
        for _ in range(operation_number):
            parallel_add_row(
                dataframe, str(uuid4()), {
                    'A': str(uuid4()),
                    'B': str(uuid4()),
                    'C': str(uuid4()),
                    'D': str(uuid4())
                })

    def thread_change(operation_number: int, dataframe: DataFrame):
        for _ in range(operation_number):
            parallel_change_row_value(
                dataframe,
                dataframe.index.values[randint(0,
                                               len(dataframe) - 1)], {
                                                   'A': str(uuid4()),
                                                   'B': str(uuid4()),
                                                   'C': str(uuid4()),
                                                   'D': str(uuid4())
                                               })

    dataframe = DataFrame(columns=['A', 'B', 'C', 'D'])
    assign_dataframe(dataframe, 1000, selected_columns, queue_name)

    assert dataframe.empty

    with ThreadPoolExecutor(max_workers=2) as executor:
        future_a = executor.submit(thread_adding, 4000, dataframe)
        future_b = executor.submit(thread_adding, 4000, dataframe)
        future_c = executor.submit(thread_change, 1000, dataframe)
        future_a.result()
        future_b.result()
        future_c.result()

    assert len(dataframe) == 1000
Exemplo n.º 4
0
def test_assign_none(queue_name):
    real_queue_name = QueuesHandler(
    ).default_queue_name if queue_name is None else queue_name
    assign_dataframe(DataFrame(columns=['A', 'B', 'C', 'D']),
                     2, ['B'],
                     queue_name=real_queue_name)

    assign_dataframe(None, 1, [], queue_name=real_queue_name)
    queue_data = QueuesHandler()[real_queue_name]

    assert queue_data[QueueHandlerItem.MAX_SIZE] == 1
    assert queue_data[QueueHandlerItem.DATAFRAME] is None
    assert queue_data[QueueHandlerItem.QUEUE] == deque()
Exemplo n.º 5
0
def test_massive_managing(queue_name, rows_nb, columns, dataframe_max_size):
    @managing(queue_name=queue_name)
    def manage() -> NoReturn:
        pass

    data = numpy.array(numpy.random.rand(rows_nb, len(columns)))
    index = numpy.array(numpy.arange(rows_nb))
    dataframe = DataFrame(data, index=index, columns=columns)

    assert len(dataframe) == rows_nb
    assign_dataframe(dataframe, dataframe_max_size, columns, queue_name)
    start = time.time()
    manage()
    end = time.time()
    assert len(dataframe) == dataframe_max_size

    print("\n{} managing execution time : {} s".format(queue_name,
                                                       end - start))
Exemplo n.º 6
0
def test_assign_dataframe(queue_name, dataframe, max_size, selected_columns):
    real_queue_name = QueuesHandler(
    ).default_queue_name if queue_name is None else queue_name
    assign_dataframe(dataframe,
                     max_size,
                     selected_columns,
                     queue_name=real_queue_name)
    queue_data = QueuesHandler()[real_queue_name]

    assert queue_data[QueueHandlerItem.MAX_SIZE] == max_size
    assert id(queue_data[QueueHandlerItem.DATAFRAME]) == id(dataframe)

    assert len(queue_data[QueueHandlerItem.QUEUE]) == len(dataframe)
    for queue_item in queue_data[QueueHandlerItem.QUEUE]:
        index = queue_item[0]
        values = queue_item[1]

        assert len(values) == len(selected_columns)

        for column_name, column_value in values.items():
            assert dataframe[column_name][index] == column_value
Exemplo n.º 7
0
def test_list_queue_names():
    dataframe_a = DataFrame(array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]),
                            index=['a1', 'a2', 'a3'], columns=['A', 'B', 'C', 'D'])
    dataframe_b = DataFrame(array([[13, 14, 15], [17, 18, 19]]), index=['a4', 'a5'],
                            columns=['E', 'F', 'G'])

    initial_size = len(list_queue_names())

    assert len(list_queue_names()) == initial_size

    queue_name_1 = str(uuid4())
    assign_dataframe(dataframe_b, 2, 'E', queue_name=queue_name_1)
    queue_names = list_queue_names()
    assert len(queue_names) == initial_size + 1
    assert queue_name_1 in queue_names

    queue_name_2 = str(uuid4())
    assign_dataframe(dataframe_a, 10, 'D', queue_name=queue_name_2)
    queue_names = list_queue_names()
    assert len(queue_names) == initial_size + 2
    assert all(queue_name in queue_names for queue_name in [queue_name_1, queue_name_2])
Exemplo n.º 8
0
def test_sequential_1(queue_name, columns, selected_columns):
    queue_name = queue_name if queue_name is not None else QueuesHandler(
    ).default_queue_name

    @managing(queue_name=queue_name)
    @adding(queue_items_creation_function=create_queue_item,
            other_args={"selected_columns": selected_columns},
            queue_name=queue_name)
    def sequential_add_row(dataframe: DataFrame, index: str,
                           columns_dict: dict) -> Tuple[str, Dict]:
        return add_row(dataframe, index, columns_dict)

    @adding(queue_items_creation_function=create_queue_item,
            other_args={"selected_columns": selected_columns},
            queue_name=queue_name)
    def sequential_change_row_value(
            dataframe: DataFrame, index: str,
            new_columns_dict: dict) -> Tuple[str, Dict]:
        return change_row_value(dataframe, index, new_columns_dict)

    dataframe = DataFrame(columns=columns)
    assign_dataframe(dataframe, 2, selected_columns, queue_name)

    assert dataframe.empty
    assert QueuesHandler()._QueuesHandler__queues[queue_name] == deque()

    columns_dict_row_1 = {column: str(uuid4()) for column in columns}
    sequential_add_row(dataframe, "1", columns_dict_row_1)
    assert len(dataframe) == 1
    result_row_1 = ("1", {
        selected_column: columns_dict_row_1[selected_column]
        for selected_column in selected_columns
    })
    assert QueuesHandler()._QueuesHandler__queues[queue_name] == deque(
        [result_row_1])

    columns_dict_row_2 = {column: str(uuid4()) for column in columns}
    sequential_add_row(dataframe, "2", columns_dict_row_2)
    assert len(dataframe) == 2
    result_row_2 = ("2", {
        selected_column: columns_dict_row_2[selected_column]
        for selected_column in selected_columns
    })
    assert QueuesHandler()._QueuesHandler__queues[queue_name] == deque(
        [result_row_1, result_row_2])

    columns_dict_row_3 = {column: str(uuid4()) for column in columns}
    sequential_add_row(dataframe, "3", columns_dict_row_3)
    assert len(dataframe) == 2
    result_row_3 = ("3", {
        selected_column: columns_dict_row_3[selected_column]
        for selected_column in selected_columns
    })
    assert QueuesHandler()._QueuesHandler__queues[queue_name] == deque(
        [result_row_2, result_row_3])

    remove_row(dataframe, "3")
    assert len(dataframe) == 1
    assert QueuesHandler()._QueuesHandler__queues[queue_name] == deque(
        [result_row_2, result_row_3])

    remove_row(dataframe, "2")
    assert dataframe.empty
    assert QueuesHandler()._QueuesHandler__queues[queue_name] == deque(
        [result_row_2, result_row_3])

    columns_dict_row_4 = {column: str(uuid4()) for column in columns}
    sequential_add_row(dataframe, "4", columns_dict_row_4)
    columns_dict_row_5 = {column: str(uuid4()) for column in columns}
    sequential_add_row(dataframe, "5", columns_dict_row_5)
    assert len(dataframe) == 2
    result_row_4 = ("4", {
        selected_column: columns_dict_row_4[selected_column]
        for selected_column in selected_columns
    })
    result_row_5 = ("5", {
        selected_column: columns_dict_row_5[selected_column]
        for selected_column in selected_columns
    })
    assert QueuesHandler()._QueuesHandler__queues[queue_name] == deque(
        [result_row_2, result_row_3, result_row_4, result_row_5])

    columns_dict_row_6 = {column: str(uuid4()) for column in columns}
    sequential_add_row(dataframe, "6", columns_dict_row_6)
    assert len(dataframe) == 2
    result_row_6 = ("6", {
        selected_column: columns_dict_row_6[selected_column]
        for selected_column in selected_columns
    })
    assert QueuesHandler()._QueuesHandler__queues[queue_name] == deque(
        [result_row_5, result_row_6])

    new_columns_dict_row_5 = {column: str(uuid4()) for column in columns}
    sequential_change_row_value(dataframe, "5", new_columns_dict_row_5)
    assert len(dataframe) == 2
    new_result_row_5 = ("5", {
        selected_column: new_columns_dict_row_5[selected_column]
        for selected_column in selected_columns
    })
    assert QueuesHandler()._QueuesHandler__queues[queue_name] == deque(
        [result_row_5, result_row_6, new_result_row_5])

    columns_dict_row_7 = {column: str(uuid4()) for column in columns}
    sequential_add_row(dataframe, "7", columns_dict_row_7)
    assert len(dataframe) == 2
    result_row_7 = ("7", {
        selected_column: columns_dict_row_7[selected_column]
        for selected_column in selected_columns
    })
    assert QueuesHandler()._QueuesHandler__queues[queue_name] == deque(
        [new_result_row_5, result_row_7])

    ################################################################################################

    for behaviour in [QueueBehaviour.ALL_ITEMS, QueueBehaviour.LAST_ITEM]:
        values_row_1 = [str(uuid4()) for _ in range(len(columns))]
        values_row_2 = [str(uuid4()) for _ in range(len(columns))]
        dataframe_2 = DataFrame(data=[values_row_1, values_row_2],
                                index=['1', '2'],
                                columns=columns)
        assign_dataframe(dataframe_2,
                         2,
                         selected_columns,
                         queue_name,
                         queue_behaviour=behaviour)
        assert len(dataframe_2) == 2
        assert QueuesHandler()._QueuesHandler__queues[queue_name] == deque([
            ('1', {
                column: values_row_1[list(columns).index(column)]
                for column in selected_columns
            }),
            ('2', {
                column: values_row_2[list(columns).index(column)]
                for column in selected_columns
            })
        ])

        new_values_row_1 = [str(uuid4()) for _ in range(len(columns))]
        sequential_change_row_value(dataframe_2, '1', {
            column: new_values_row_1[i]
            for i, column in enumerate(list(columns))
        })
        assert len(dataframe_2) == 2
        assert QueuesHandler()._QueuesHandler__queues[queue_name] == deque([
            ('1', {
                column: values_row_1[list(columns).index(column)]
                for column in selected_columns
            }),
            ('2', {
                column: values_row_2[list(columns).index(column)]
                for column in selected_columns
            }),
            ('1', {
                column: new_values_row_1[list(columns).index(column)]
                for column in selected_columns
            })
        ])

        sequential_change_row_value(dataframe_2, '1', {
            column: values_row_1[i]
            for i, column in enumerate(list(columns))
        })
        assert len(dataframe_2) == 2
        assert QueuesHandler()._QueuesHandler__queues[queue_name] == deque([
            ('1', {
                column: values_row_1[list(columns).index(column)]
                for column in selected_columns
            }),
            ('2', {
                column: values_row_2[list(columns).index(column)]
                for column in selected_columns
            }),
            ('1', {
                column: new_values_row_1[list(columns).index(column)]
                for column in selected_columns
            }),
            ('1', {
                column: values_row_1[list(columns).index(column)]
                for column in selected_columns
            })
        ])

        values_row_3 = [str(uuid4()) for _ in range(len(columns))]
        sequential_add_row(
            dataframe_2, '3',
            {column: values_row_3[i]
             for i, column in enumerate(columns)})
        assert len(dataframe_2) == 2
        if behaviour == QueueBehaviour.ALL_ITEMS:
            assert QueuesHandler()._QueuesHandler__queues[queue_name] == deque(
                [('2', {
                    column: values_row_2[list(columns).index(column)]
                    for column in selected_columns
                }),
                 ('1', {
                     column: new_values_row_1[list(columns).index(column)]
                     for column in selected_columns
                 }),
                 ('1', {
                     column: values_row_1[list(columns).index(column)]
                     for column in selected_columns
                 }),
                 ('3', {
                     column: values_row_3[list(columns).index(column)]
                     for column in selected_columns
                 })])
            assert all(label in dataframe_2.index for label in ['2', '3'])
        elif behaviour == QueueBehaviour.LAST_ITEM:
            assert QueuesHandler()._QueuesHandler__queues[queue_name] == deque(
                [('1', {
                    column: new_values_row_1[list(columns).index(column)]
                    for column in selected_columns
                }),
                 ('1', {
                     column: values_row_1[list(columns).index(column)]
                     for column in selected_columns
                 }),
                 ('3', {
                     column: values_row_3[list(columns).index(column)]
                     for column in selected_columns
                 })])
            assert all(label in dataframe_2.index for label in ['1', '3'])
        else:
            assert False, "Missing test for the behaviour {}".format(behaviour)
Exemplo n.º 9
0
def test_sequential_2(queue_name, columns, selected_columns):
    @managing(queue_name=queue_name)
    @adding(queue_items_creation_function=create_queue_items,
            other_args={"selected_columns": selected_columns},
            queue_name=queue_name)
    def sequential_add_rows(
            dataframe: DataFrame, indexes: List[str],
            columns_dicts: List[dict]) -> List[Tuple[str, Dict]]:
        assert len(indexes) == len(columns_dicts)
        result = list()
        for index, columns_dict in zip(indexes, columns_dicts):
            result.append(add_row(dataframe, index, columns_dict))
        return result

    dataframe = DataFrame(columns=columns)
    assign_dataframe(dataframe, 5, selected_columns, queue_name)
    assert dataframe.empty
    assert QueuesHandler()._QueuesHandler__queues[queue_name] == deque()

    columns_dict_row_1 = {column: str(uuid4()) for column in columns}
    sequential_add_rows(dataframe, ["1"], [columns_dict_row_1])
    assert len(dataframe) == 1
    result_row_1 = ("1", {
        selected_column: columns_dict_row_1[selected_column]
        for selected_column in selected_columns
    })
    assert QueuesHandler()._QueuesHandler__queues[queue_name] == deque(
        [result_row_1])

    columns_dict_row_2 = {column: str(uuid4()) for column in columns}
    columns_dict_row_3 = {column: str(uuid4()) for column in columns}
    columns_dict_row_4 = {column: str(uuid4()) for column in columns}
    sequential_add_rows(
        dataframe, ["2", "3", "4"],
        [columns_dict_row_2, columns_dict_row_3, columns_dict_row_4])
    assert len(dataframe) == 4
    result_row_2 = ("2", {
        selected_column: columns_dict_row_2[selected_column]
        for selected_column in selected_columns
    })
    result_row_3 = ("3", {
        selected_column: columns_dict_row_3[selected_column]
        for selected_column in selected_columns
    })
    result_row_4 = ("4", {
        selected_column: columns_dict_row_4[selected_column]
        for selected_column in selected_columns
    })
    assert QueuesHandler()._QueuesHandler__queues[queue_name] == deque(
        [result_row_1, result_row_2, result_row_3, result_row_4])

    columns_dict_row_5 = {column: str(uuid4()) for column in columns}
    columns_dict_row_6 = {column: str(uuid4()) for column in columns}
    columns_dict_row_7 = {column: str(uuid4()) for column in columns}
    sequential_add_rows(
        dataframe, ["5", "6", "7"],
        [columns_dict_row_5, columns_dict_row_6, columns_dict_row_7])
    assert len(dataframe) == 5
    result_row_5 = ("5", {
        selected_column: columns_dict_row_5[selected_column]
        for selected_column in selected_columns
    })
    result_row_6 = ("6", {
        selected_column: columns_dict_row_6[selected_column]
        for selected_column in selected_columns
    })
    result_row_7 = ("7", {
        selected_column: columns_dict_row_7[selected_column]
        for selected_column in selected_columns
    })
    assert QueuesHandler()._QueuesHandler__queues[queue_name] == deque(
        [result_row_3, result_row_4, result_row_5, result_row_6, result_row_7])