Exemplo n.º 1
0
def test_deploy_ramp_event(session_scope_function):
    database_config = read_config(database_config_template())
    event_config_filename = ramp_config_template()
    event_config = read_config(event_config_filename)
    ramp_config = generate_ramp_config(event_config)
    deploy_ramp_event(database_config_template(), ramp_config_template())

    # simulate that we add users and sign-up for the event and that they
    # submitted the starting kit
    with session_scope(database_config['sqlalchemy']) as session:
        add_users(session)
        sign_up_team(session, ramp_config['event_name'], 'test_user')
        submit_starting_kits(session, ramp_config['event_name'], 'test_user',
                             ramp_config['ramp_kit_submissions_dir'])

    # run the dispatcher on the event which are in the dataset
    dispatcher = Dispatcher(config=database_config,
                            event_config=event_config,
                            worker=CondaEnvWorker,
                            n_workers=-1,
                            hunger_policy='exit')
    dispatcher.launch()

    # the iris kit contain a submission which should fail for a user
    with session_scope(database_config['sqlalchemy']) as session:
        submission = get_submissions(session,
                                     event_config['ramp']['event_name'],
                                     'training_error')
        assert len(submission) == 1
Exemplo n.º 2
0
def test_aws_dispatcher(session_toy):  # noqa
    # copy of test_integration_dispatcher but with AWS
    if not os.path.isfile(os.path.join(HERE, 'config.yml')):
        pytest.skip("Only for local tests for now")

    config = read_config(database_config_template())
    event_config = ramp_config_template()
    event_config = read_config(event_config)

    # patch the event_config to match local config.yml for AWS
    aws_event_config = read_config(os.path.join(HERE, 'config.yml'))
    event_config['worker'] = aws_event_config['worker']

    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=AWSWorker,
                            n_workers=-1,
                            hunger_policy='exit')
    dispatcher.launch()

    # the iris kit contain a submission which should fail for each user
    submission = get_submissions(session_toy,
                                 event_config['ramp']['event_name'],
                                 'training_error')
    assert len(submission) == 2
Exemplo n.º 3
0
 def fetch_from_db(self, session):
     """Fetch the submission from the database and create the workers."""
     submissions = get_submissions(session,
                                   self._ramp_config['event_name'],
                                   state='new')
     if not submissions:
         return
     for submission_id, submission_name, _ in submissions:
         # do not train the sandbox submission
         submission = get_submission_by_id(session, submission_id)
         if not submission.is_not_sandbox:
             continue
         # create the worker
         worker = self.worker(self._worker_config, submission_name)
         set_submission_state(session, submission_id, 'sent_to_training')
         update_user_leaderboards(
             session,
             self._ramp_config['event_name'],
             submission.team.name,
             new_only=True,
         )
         self._awaiting_worker_queue.put_nowait(
             (worker, (submission_id, submission_name)))
         logger.info('Submission {} added to the queue of submission to be '
                     'processed'.format(submission_name))
Exemplo n.º 4
0
def test_dispatcher_worker_retry(session_toy):
    config = read_config(database_config_template())
    event_config = read_config(ramp_config_template())
    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=CondaEnvWorker,
                            n_workers=10,
                            hunger_policy='exit')

    dispatcher.fetch_from_db(session_toy)
    dispatcher.launch_workers(session_toy)

    # Get one worker and set status to 'retry'
    worker, (submission_id, submission_name) = \
        dispatcher._processing_worker_queue.get()
    setattr(worker, 'status', 'retry')
    assert worker.status == 'retry'
    # Add back to queue
    dispatcher._processing_worker_queue.put_nowait(
        (worker, (submission_id, submission_name)))

    while not dispatcher._processing_worker_queue.empty():
        dispatcher.collect_result(session_toy)
    submissions = get_submissions(session_toy, 'iris_test', 'new')
    assert submission_name in [sub[1] for sub in submissions]
Exemplo n.º 5
0
def test_error_handling_worker_setup_error(session_toy, caplog):
    # make sure the error on the worker.setup is dealt with correctly
    # set mock worker
    class Worker_mock():
        def __init__(self, *args, **kwargs):
            self.state = None

        def setup(self):
            raise Exception('Test error')

        def teardown(self):
            pass

    config = read_config(database_config_template())
    event_config = read_config(ramp_config_template())

    worker = Worker_mock()
    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=Worker_mock,
                            n_workers=-1,
                            hunger_policy='exit')

    dispatcher.launch()
    submissions = get_submissions(session_toy,
                                  event_config['ramp']['event_name'],
                                  'checking_error')
    assert len(submissions) == 6
    worker.status = 'error'
    assert 'Test error' in caplog.text
Exemplo n.º 6
0
def test_add_submission_create_new_submission(base_db):
    # check that we can make a new submission to the database
    # it will require to have already a team and an event
    session = base_db
    config = ramp_config_template()
    event_name, username = _setup_sign_up(session)
    ramp_config = generate_ramp_config(read_config(config))

    submission_name = 'random_forest_10_10'
    path_submission = os.path.join(
        os.path.dirname(ramp_config['ramp_sandbox_dir']), submission_name)
    add_submission(session, event_name, username, submission_name,
                   path_submission)
    all_submissions = get_submissions(session, event_name, None)
    # check that the submissions have been copied
    for sub_id, _, _ in all_submissions:
        sub = get_submission_by_id(session, sub_id)
        assert os.path.exists(sub.path)
        assert os.path.exists(os.path.join(sub.path, 'classifier.py'))

    # `sign_up_team` make a submission (sandbox) by user. This submission will
    # be the third submission.
    assert len(all_submissions) == 3
    # check that the number of submissions for an event was updated
    event = session.query(Event).filter(Event.name == event_name).one_or_none()
    assert event.n_submissions == 1
    submission = get_submission_by_name(session, event_name, username,
                                        submission_name)
    assert submission.name == submission_name
    submission_file = submission.files[0]
    assert submission_file.name == 'classifier'
    assert submission_file.extension == 'py'
    assert (os.path.join('submission_000000005', 'classifier.py')
            in submission_file.path)
Exemplo n.º 7
0
def test_add_submission_create_new_submission(base_db):
    # check that we can make a new submission to the database
    # it will require to have already a team and an event
    session = base_db
    config = read_config(ramp_config_template())
    event_name, username = _setup_sign_up(session, config)
    ramp_config = generate_ramp_config(config)

    submission_name = 'random_forest_10_10'
    path_submission = os.path.join(
        os.path.dirname(ramp_config['ramp_sandbox_dir']), submission_name)
    add_submission(session, event_name, username, submission_name,
                   path_submission)
    all_submissions = get_submissions(session, event_name, None)

    # `sign_up_team` make a submission (sandbox) by user. This submission will
    # be the third submission.
    assert len(all_submissions) == 3
    submission = get_submission_by_name(session, event_name, username,
                                        submission_name)
    assert submission.name == submission_name
    submission_file = submission.files[0]
    assert submission_file.name == 'classifier'
    assert submission_file.extension == 'py'
    assert (os.path.join('submission_000000005', 'classifier.py')
            in submission_file.path)
Exemplo n.º 8
0
def test_unit_test_dispatcher(session_toy):
    # make sure that the size of the list is bigger than the number of
    # submissions
    config = read_config(database_config_template())
    event_config = read_config(ramp_config_template())
    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=CondaEnvWorker,
                            n_workers=100,
                            hunger_policy='exit')

    # check that all the queue are empty
    assert dispatcher._awaiting_worker_queue.empty()
    assert dispatcher._processing_worker_queue.empty()
    assert dispatcher._processed_submission_queue.empty()

    # check that all submissions are queued
    submissions = get_submissions(session_toy, 'iris_test', 'new')
    dispatcher.fetch_from_db(session_toy)
    # we should remove the starting kit from the length of the submissions for
    # each user
    assert dispatcher._awaiting_worker_queue.qsize() == len(submissions) - 2
    submissions = get_submissions(session_toy, 'iris_test', 'sent_to_training')
    assert len(submissions) == 6

    # start the training
    dispatcher.launch_workers(session_toy)
    # be sure that the training is finished
    while not dispatcher._processing_worker_queue.empty():
        dispatcher.collect_result(session_toy)

    assert len(get_submissions(session_toy, 'iris_test', 'new')) == 2
    assert (len(get_submissions(session_toy, 'iris_test',
                                'training_error')) == 2)
    assert len(get_submissions(session_toy, 'iris_test', 'tested')) == 4

    dispatcher.update_database_results(session_toy)
    assert dispatcher._processed_submission_queue.empty()
    event = get_event(session_toy, 'iris_test')
    assert event.private_leaderboard_html
    assert event.public_leaderboard_html_with_links
    assert event.public_leaderboard_html_no_links
    assert event.failed_leaderboard_html
    assert event.new_leaderboard_html is None
    assert event.public_competition_leaderboard_html
    assert event.private_competition_leaderboard_html
Exemplo n.º 9
0
def test_get_submissions(session_scope_module, state, expected_id):
    submissions = get_submissions(session_scope_module, 'iris_test',
                                  state=state)
    assert len(submissions) == len(expected_id)
    for submission_id, sub_name, sub_path in submissions:
        assert submission_id in expected_id
        assert 'submission_{:09d}'.format(submission_id) == sub_name
        path_file = os.path.join('submission_{:09d}'.format(submission_id),
                                 'classifier.py')
        assert path_file in sub_path[0]
Exemplo n.º 10
0
def test_integration_dispatcher(session_toy):
    config = read_config(database_config_template())
    event_config = read_config(ramp_config_template())
    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=CondaEnvWorker,
                            n_worker=-1,
                            hunger_policy='exit')
    dispatcher.launch()

    # the iris kit contain a submission which should fail for each user
    submission = get_submissions(session_toy,
                                 event_config['ramp']['event_name'],
                                 'training_error')
    assert len(submission) == 2
Exemplo n.º 11
0
def test_dispatcher_aws_not_launching(session_toy_aws, caplog):
    # given the test config file the instance should not be able to launch
    # due to authentication error
    # after unsuccessful try the worker should teardown
    config = read_config(database_config_template())
    event_config = read_config(ramp_aws_config_template())

    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=AWSWorker,
                            n_workers=10,
                            hunger_policy='exit')
    dispatcher.fetch_from_db(session_toy_aws)
    submissions = get_submissions(session_toy_aws, 'iris_aws_test', 'new')

    dispatcher.launch_workers(session_toy_aws)
    assert 'AuthFailure' in caplog.text
    # training should not have started
    assert 'training' not in caplog.text
    num_running_workers = dispatcher._processing_worker_queue.qsize()
    assert num_running_workers == 0
    submissions2 = get_submissions(session_toy_aws, 'iris_aws_test', 'new')
    # assert that all the submissions are still in the 'new' state
    assert len(submissions) == len(submissions2)
Exemplo n.º 12
0
def test_submit_starting_kits(base_db):
    session = base_db
    config = ramp_config_iris()
    event_name, username = _setup_sign_up(session)
    ramp_config = generate_ramp_config(read_config(config))

    submit_starting_kits(session, event_name, username,
                         ramp_config['ramp_kit_submissions_dir'])

    submissions = get_submissions(session, event_name, None)
    submissions_id = [sub[0] for sub in submissions]
    assert len(submissions) == 5
    expected_submission_name = {'starting_kit', 'starting_kit_test',
                                'random_forest_10_10', 'error'}
    submission_name = {get_submission_by_id(session, sub_id).name
                       for sub_id in submissions_id}
    assert submission_name == expected_submission_name
Exemplo n.º 13
0
def test_dispatcher_timeout(session_toy):
    config = read_config(database_config_template())
    event_config = read_config(ramp_config_template())
    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=CondaEnvWorker,
                            n_workers=-1,
                            hunger_policy='exit')
    # override the timeout of the worker
    dispatcher._worker_config["timeout"] = 1
    dispatcher.launch()

    # we should have at least 3 submissions which will fail:
    # 2 for errors and 1 for timeout
    submissions = get_submissions(session_toy,
                                  event_config['ramp']['event_name'],
                                  'training_error')
    assert len(submissions) >= 2
Exemplo n.º 14
0
def test_submit_starting_kits(base_db):
    session = base_db
    config = read_config(ramp_config_template())
    event_name, username = _setup_sign_up(session, config)
    ramp_config = generate_ramp_config(config)

    submit_starting_kits(
        session, event_name, username,
        os.path.join(ramp_config['ramp_kits_dir'], ramp_config['event'],
                     config['ramp']['submissions_dir']))

    submissions = get_submissions(session, event_name, None)
    submissions_id = [sub[0] for sub in submissions]
    assert len(submissions) == 5
    expected_submission_name = {
        'starting_kit', 'starting_kit_test', 'random_forest_10_10', 'error'
    }
    submission_name = set(
        get_submission_by_id(session, sub_id).name
        for sub_id in submissions_id)
    assert submission_name == expected_submission_name
Exemplo n.º 15
0
 def launch(self):
     """Launch the dispatcher."""
     logger.info('Starting the RAMP dispatcher')
     with session_scope(self._database_config) as session:
         logger.info('Open a session to the database')
         try:
             while not self._poison_pill:
                 self.fetch_from_db(session)
                 self.launch_workers(session)
                 self.collect_result(session)
                 self.update_database_results(session)
         finally:
             # reset the submissions to 'new' in case of error or unfinished
             # training
             submissions = get_submissions(session,
                                           self._ramp_config['event_name'],
                                           state=None)
             for submission_id, _, _ in submissions:
                 submission_state = get_submission_state(
                     session, submission_id)
                 if submission_state in ('training', 'send_to_training'):
                     set_submission_state(session, submission_id, 'new')
         logger.info('Dispatcher killed by the poison pill')
Exemplo n.º 16
0
def test_get_submission_unknown_state(session_scope_module):
    with pytest.raises(UnknownStateError, match='Unrecognized state'):
        get_submissions(session_scope_module, 'iris_test', state='whatever')
Exemplo n.º 17
0
def test_info_on_training_error(test_launch_ec2_instances, upload_submission,
                                launch_train, is_spot_terminated,
                                training_finished, training_successful,
                                get_log_content, check_instance_status,
                                download_log, session_toy_aws, caplog):
    # make sure that the Python error from the solution is passed to the
    # dispatcher
    # everything shoud be mocked as correct output from AWS instances
    # on setting up the instance and loading the submission
    # mock dummy AWS instance
    class DummyInstance:
        id = 1

    test_launch_ec2_instances.return_value = (DummyInstance(), ), 0
    upload_submission.return_value = 0
    launch_train.return_value = 0
    is_spot_terminated.return_value = 0
    training_finished.return_value = False
    download_log.return_value = 0

    config = read_config(database_config_template())
    event_config = read_config(ramp_aws_config_template())

    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=AWSWorker,
                            n_workers=10,
                            hunger_policy='exit')
    dispatcher.fetch_from_db(session_toy_aws)
    dispatcher.launch_workers(session_toy_aws)
    num_running_workers = dispatcher._processing_worker_queue.qsize()
    # worker, (submission_id, submission_name) = \
    #     dispatcher._processing_worker_queue.get()
    # assert worker.status == 'running'
    submissions = get_submissions(session_toy_aws, 'iris_aws_test', 'training')
    ids = [submissions[idx][0] for idx in range(len(submissions))]
    assert len(submissions) > 1
    assert num_running_workers == len(ids)

    dispatcher.time_between_collection = 0
    training_successful.return_value = False

    # now we will end the submission with training error
    training_finished.return_value = True
    training_error_msg = 'Python error here'
    get_log_content.return_value = training_error_msg
    check_instance_status.return_value = 'finished'

    dispatcher.collect_result(session_toy_aws)

    # the worker which we were using should have been teared down
    num_running_workers = dispatcher._processing_worker_queue.qsize()

    assert num_running_workers == 0

    submissions = get_submissions(session_toy_aws, 'iris_aws_test',
                                  'training_error')
    assert len(submissions) == len(ids)

    submission = get_submission_by_id(session_toy_aws, submissions[0][0])
    assert training_error_msg in submission.error_msg
Exemplo n.º 18
0
def score_plot(session, event):
    from bokeh.plotting import figure
    from bokeh.models.sources import ColumnDataSource
    from bokeh.models.formatters import DatetimeTickFormatter

    submissions = get_submissions(session, event.name, None)
    submissions = [
        get_submission_by_id(session, sub_id) for sub_id, _, _ in submissions
        if get_submission_by_id(session, sub_id).is_public_leaderboard
        and get_submission_by_id(session, sub_id).is_valid
    ]
    score_names = [score_type.name for score_type in event.score_types]
    scoress = np.array([[
        score.valid_score_cv_bag
        for score in submission.ordered_scores(score_names)
    ] for submission in submissions]).T

    score_plot_df = pd.DataFrame()
    score_plot_df['submitted at (UTC)'] = [
        submission.submission_timestamp for submission in submissions
    ]
    score_plot_df['contributivity'] = [
        submission.contributivity for submission in submissions
    ]
    score_plot_df['historical contributivity'] = [
        submission.historical_contributivity for submission in submissions
    ]
    for score_name in score_names:  # to make sure the column is created
        score_plot_df[score_name] = 0
    for score_name, scores in zip(score_names, scoress):
        score_plot_df[score_name] = scores

    score_name = event.official_score_name
    score_plot_df = score_plot_df[
        score_plot_df['submitted at (UTC)'] > event.opening_timestamp]
    score_plot_df = score_plot_df.sort_values('submitted at (UTC)')
    score_plot_df = add_pareto(score_plot_df, score_name,
                               event.official_score_type.worst,
                               event.official_score_type.is_lower_the_better)

    is_open = (score_plot_df['submitted at (UTC)'] >
               event.public_opening_timestamp).values

    max_contributivity = max(0.0000001,
                             max(score_plot_df['contributivity'].values))
    max_historical_contributivity = max(
        0.0000001, max(score_plot_df['historical contributivity'].values))

    fill_color_1 = (176, 23, 31)
    fill_color_2 = (16, 78, 139)
    fill_colors_1 = color_gradient(
        fill_color_1,
        score_plot_df['contributivity'].values / max_contributivity)
    fill_colors_2 = color_gradient(
        fill_color_2, score_plot_df['historical contributivity'].values /
        max_historical_contributivity)
    fill_colors = np.minimum(fill_colors_1, fill_colors_2).astype(int)
    fill_colors = ["#%02x%02x%02x" % (c[0], c[1], c[2]) for c in fill_colors]

    score_plot_df['x'] = score_plot_df['submitted at (UTC)']
    score_plot_df['y'] = score_plot_df[score_name]
    score_plot_df['line_color'] = 'royalblue'
    score_plot_df['circle_size'] = 8
    score_plot_df['line_color'] = 'royalblue'
    score_plot_df.loc[is_open, 'line_color'] = 'coral'
    score_plot_df['fill_color'] = fill_colors
    score_plot_df['fill_alpha'] = 0.5
    score_plot_df['line_width'] = 0
    score_plot_df['label'] = 'closed phase'
    score_plot_df.loc[is_open, 'label'] = 'open phase'

    source = ColumnDataSource(score_plot_df)
    pareto_df = score_plot_df[score_plot_df[score_name +
                                            ' pareto'] == 1].copy()
    pareto_df = pareto_df.append(pareto_df.iloc[-1])
    pareto_df.iloc[-1,
                   pareto_df.columns.get_loc('x')] = (max(score_plot_df['x']))
    pareto_df = make_step_df(pareto_df,
                             event.official_score_type.is_lower_the_better)
    source_pareto = ColumnDataSource(pareto_df)

    tools = ['pan,wheel_zoom,box_zoom,reset,save,tap']
    p = figure(plot_width=900, plot_height=600, tools=tools, title='Scores')

    p.circle('x',
             'y',
             size='circle_size',
             line_color='line_color',
             fill_color='fill_color',
             fill_alpha='fill_alpha',
             line_width=1,
             source=source,
             legend='label')
    p.line('x',
           'y',
           line_width=3,
           line_color='goldenrod',
           source=source_pareto,
           legend='best score',
           alpha=0.9)

    p.xaxis.formatter = DatetimeTickFormatter(
        hours=['%d %B %Y'],
        days=['%d %B %Y'],
        months=['%d %B %Y'],
        years=['%d %B %Y'],
    )
    p.xaxis.major_label_orientation = np.pi / 4

    if event.official_score_type.is_lower_the_better:
        p.yaxis.axis_label = score_name + ' (the lower the better)'
        p.legend.location = 'top_right'
    else:
        p.yaxis.axis_label = score_name + ' (the greater the better)'
        p.legend.location = 'bottom_right'
    p.xaxis.axis_label = 'submission timestamp (UTC)'
    p.xaxis.axis_label_text_font_size = '14pt'
    p.yaxis.axis_label_text_font_size = '14pt'
    p.legend.label_text_font_size = '14pt'
    p.title.text_font_size = '16pt'
    p.xaxis.major_label_text_font_size = '10pt'
    p.yaxis.major_label_text_font_size = '10pt'
    return p
Exemplo n.º 19
0
 def _reset_submission_after_failure(session, even_name):
     submissions = get_submissions(session, even_name, state=None)
     for submission_id, _, _ in submissions:
         submission_state = get_submission_state(session, submission_id)
         if submission_state in ('training', 'send_to_training'):
             set_submission_state(session, submission_id, 'new')