Пример #1
0
    def test_populate_cache(self, run_mock, flow_mock, dataset_mock,
                            task_mock):
        openml.populate_cache(task_ids=[1, 2],
                              dataset_ids=[3, 4],
                              flow_ids=[5, 6],
                              run_ids=[7, 8])
        self.assertEqual(run_mock.call_count, 2)
        for argument, fixture in six.moves.zip(run_mock.call_args_list,
                                               [(7, ), (8, )]):
            self.assertEqual(argument[0], fixture)

        self.assertEqual(flow_mock.call_count, 2)
        for argument, fixture in six.moves.zip(flow_mock.call_args_list,
                                               [(5, ), (6, )]):
            self.assertEqual(argument[0], fixture)

        self.assertEqual(dataset_mock.call_count, 2)
        for argument, fixture in six.moves.zip(dataset_mock.call_args_list,
                                               [(3, ), (4, )]):
            self.assertEqual(argument[0], fixture)

        self.assertEqual(task_mock.call_count, 2)
        for argument, fixture in six.moves.zip(task_mock.call_args_list,
                                               [(1, ), (2, )]):
            self.assertEqual(argument[0], fixture)
Пример #2
0
def callattr_ahead_of_alltests(request):
    """
    This procedure will run at the start of the pytest session.
    It will prefetch several task that are going to be used by
    the testing face, and it does so in a robust way, until the openml
    API provides the desired resources
    """
    tasks_used = [
        146818,  # Australian
        2295,  # cholesterol
        2075,  # abalone
        2071,  # adult
        3,  # kr-vs-kp
        9981,  # cnae-9
        146821,  # car
        146822,  # Segment
        2,  # anneal
        53,  # vehicle
        5136,  # tecator
        4871,  # sensory
        4857,  # boston
        3916,  # kc1
    ]

    # Populate the cache
    # This will make the test fail immediately rather than
    # Waiting for a openml fetch timeout
    openml.populate_cache(task_ids=tasks_used)
    # Also the bunch
    for task in tasks_used:
        fetch_openml(data_id=openml.tasks.get_task(task).dataset_id,
                     return_X_y=True)
    return
Пример #3
0
    def test_populate_cache(self, run_mock, flow_mock, dataset_mock, task_mock):
        openml.populate_cache(task_ids=[1, 2], dataset_ids=[3, 4],
                              flow_ids=[5, 6], run_ids=[7, 8])
        self.assertEqual(run_mock.call_count, 2)
        for argument, fixture in six.moves.zip(run_mock.call_args_list, [(7,), (8,)]):
            self.assertEqual(argument[0], fixture)

        self.assertEqual(flow_mock.call_count, 2)
        for argument, fixture in six.moves.zip(flow_mock.call_args_list, [(5,), (6,)]):
            self.assertEqual(argument[0], fixture)

        self.assertEqual(dataset_mock.call_count, 2)
        for argument, fixture in six.moves.zip(dataset_mock.call_args_list, [(3,), (4,)]):
            self.assertEqual(argument[0], fixture)

        self.assertEqual(task_mock.call_count, 2)
        for argument, fixture in six.moves.zip(task_mock.call_args_list, [(1,), (2,)]):
            self.assertEqual(argument[0], fixture)
            
Пример #4
0
import openml

tasks = list(openml.tasks.list_tasks(tag='study_14').keys())
openml.populate_cache(task_ids=tasks)
print('Using %d tasks', len(tasks))