Пример #1
0
def test_dispatcher_worker_retry(session_toy):
    config = read_config(database_config_template())
    event_config = read_config(ramp_config_template())
    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=CondaEnvWorker,
                            n_workers=10,
                            hunger_policy='exit')

    dispatcher.fetch_from_db(session_toy)
    dispatcher.launch_workers(session_toy)

    # Get one worker and set status to 'retry'
    worker, (submission_id, submission_name) = \
        dispatcher._processing_worker_queue.get()
    setattr(worker, 'status', 'retry')
    assert worker.status == 'retry'
    # Add back to queue
    dispatcher._processing_worker_queue.put_nowait(
        (worker, (submission_id, submission_name)))

    while not dispatcher._processing_worker_queue.empty():
        dispatcher.collect_result(session_toy)
    submissions = get_submissions(session_toy, 'iris_test', 'new')
    assert submission_name in [sub[1] for sub in submissions]
Пример #2
0
def test_unit_test_dispatcher(session_toy):
    # make sure that the size of the list is bigger than the number of
    # submissions
    config = read_config(database_config_template())
    event_config = read_config(ramp_config_template())
    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=CondaEnvWorker,
                            n_workers=100,
                            hunger_policy='exit')

    # check that all the queue are empty
    assert dispatcher._awaiting_worker_queue.empty()
    assert dispatcher._processing_worker_queue.empty()
    assert dispatcher._processed_submission_queue.empty()

    # check that all submissions are queued
    submissions = get_submissions(session_toy, 'iris_test', 'new')
    dispatcher.fetch_from_db(session_toy)
    # we should remove the starting kit from the length of the submissions for
    # each user
    assert dispatcher._awaiting_worker_queue.qsize() == len(submissions) - 2
    submissions = get_submissions(session_toy, 'iris_test', 'sent_to_training')
    assert len(submissions) == 6

    # start the training
    dispatcher.launch_workers(session_toy)
    # be sure that the training is finished
    while not dispatcher._processing_worker_queue.empty():
        dispatcher.collect_result(session_toy)

    assert len(get_submissions(session_toy, 'iris_test', 'new')) == 2
    assert (len(get_submissions(session_toy, 'iris_test',
                                'training_error')) == 2)
    assert len(get_submissions(session_toy, 'iris_test', 'tested')) == 4

    dispatcher.update_database_results(session_toy)
    assert dispatcher._processed_submission_queue.empty()
    event = get_event(session_toy, 'iris_test')
    assert event.private_leaderboard_html
    assert event.public_leaderboard_html_with_links
    assert event.public_leaderboard_html_no_links
    assert event.failed_leaderboard_html
    assert event.new_leaderboard_html is None
    assert event.public_competition_leaderboard_html
    assert event.private_competition_leaderboard_html
Пример #3
0
def test_dispatcher_aws_not_launching(session_toy_aws, caplog):
    # given the test config file the instance should not be able to launch
    # due to authentication error
    # after unsuccessful try the worker should teardown
    config = read_config(database_config_template())
    event_config = read_config(ramp_aws_config_template())

    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=AWSWorker,
                            n_workers=10,
                            hunger_policy='exit')
    dispatcher.fetch_from_db(session_toy_aws)
    submissions = get_submissions(session_toy_aws, 'iris_aws_test', 'new')

    dispatcher.launch_workers(session_toy_aws)
    assert 'AuthFailure' in caplog.text
    # training should not have started
    assert 'training' not in caplog.text
    num_running_workers = dispatcher._processing_worker_queue.qsize()
    assert num_running_workers == 0
    submissions2 = get_submissions(session_toy_aws, 'iris_aws_test', 'new')
    # assert that all the submissions are still in the 'new' state
    assert len(submissions) == len(submissions2)
Пример #4
0
def test_info_on_training_error(test_launch_ec2_instances, upload_submission,
                                launch_train, is_spot_terminated,
                                training_finished, training_successful,
                                get_log_content, check_instance_status,
                                download_log, session_toy_aws, caplog):
    # make sure that the Python error from the solution is passed to the
    # dispatcher
    # everything shoud be mocked as correct output from AWS instances
    # on setting up the instance and loading the submission
    # mock dummy AWS instance
    class DummyInstance:
        id = 1

    test_launch_ec2_instances.return_value = (DummyInstance(), ), 0
    upload_submission.return_value = 0
    launch_train.return_value = 0
    is_spot_terminated.return_value = 0
    training_finished.return_value = False
    download_log.return_value = 0

    config = read_config(database_config_template())
    event_config = read_config(ramp_aws_config_template())

    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=AWSWorker,
                            n_workers=10,
                            hunger_policy='exit')
    dispatcher.fetch_from_db(session_toy_aws)
    dispatcher.launch_workers(session_toy_aws)
    num_running_workers = dispatcher._processing_worker_queue.qsize()
    # worker, (submission_id, submission_name) = \
    #     dispatcher._processing_worker_queue.get()
    # assert worker.status == 'running'
    submissions = get_submissions(session_toy_aws, 'iris_aws_test', 'training')
    ids = [submissions[idx][0] for idx in range(len(submissions))]
    assert len(submissions) > 1
    assert num_running_workers == len(ids)

    dispatcher.time_between_collection = 0
    training_successful.return_value = False

    # now we will end the submission with training error
    training_finished.return_value = True
    training_error_msg = 'Python error here'
    get_log_content.return_value = training_error_msg
    check_instance_status.return_value = 'finished'

    dispatcher.collect_result(session_toy_aws)

    # the worker which we were using should have been teared down
    num_running_workers = dispatcher._processing_worker_queue.qsize()

    assert num_running_workers == 0

    submissions = get_submissions(session_toy_aws, 'iris_aws_test',
                                  'training_error')
    assert len(submissions) == len(ids)

    submission = get_submission_by_id(session_toy_aws, submissions[0][0])
    assert training_error_msg in submission.error_msg