示例#1
0
def test_submission_model_set_error(session_scope_module):
    submission = get_submission_by_id(session_scope_module, 5)
    error = 'training_error'
    error_msg = 'simulate an error'
    submission.set_error(error, error_msg)
    assert submission.state == error
    assert submission.error_msg == error_msg
    for cv_fold in submission.on_cv_folds:
        assert cv_fold.state == error
        assert cv_fold.error_msg == error_msg
示例#2
0
def test_submission_model_set_contributivity(session_scope_module, state,
                                             expected_contributivity):
    submission = get_submission_by_id(session_scope_module, 5)
    # set the state of the submission such that the contributivity
    submission.set_state(state)
    # set the fold contributivity to non-default
    for cv_fold in submission.on_cv_folds:
        cv_fold.contributivity = 0.3
    submission.set_contributivity()
    assert submission.contributivity == pytest.approx(expected_contributivity)
示例#3
0
def test_submission_model_set_state(session_scope_module):
    submission = get_submission_by_id(session_scope_module, 5)
    submission.set_state('sent_to_training')
    assert submission.state == 'sent_to_training'
    assert (submission.sent_to_training_timestamp -
            datetime.utcnow()).total_seconds() < 10

    submission.set_state('training')
    assert submission.state == 'training'
    assert (submission.training_timestamp -
            datetime.utcnow()).total_seconds() < 10

    submission.set_state('scored')
    assert submission.state == 'scored'
    for cv_fold in submission.on_cv_folds:
        assert cv_fold.state == 'scored'
def test_integration_dispatcher(session_toy):
    config = read_config(database_config_template())
    event_config = read_config(ramp_config_template())
    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=CondaEnvWorker,
                            n_workers=-1,
                            hunger_policy='exit')
    dispatcher.launch()

    # the iris kit contain a submission which should fail for each user
    submissions = get_submissions(session_toy,
                                  event_config['ramp']['event_name'],
                                  'training_error')
    assert len(submissions) == 2
    submission = get_submission_by_id(session_toy, submissions[0][0])
    assert 'ValueError' in submission.error_msg
示例#5
0
def test_submit_starting_kits(base_db):
    session = base_db
    config = ramp_config_iris()
    event_name, username = _setup_sign_up(session)
    ramp_config = generate_ramp_config(read_config(config))

    submit_starting_kits(session, event_name, username,
                         ramp_config['ramp_kit_submissions_dir'])

    submissions = get_submissions(session, event_name, None)
    submissions_id = [sub[0] for sub in submissions]
    assert len(submissions) == 5
    expected_submission_name = {'starting_kit', 'starting_kit_test',
                                'random_forest_10_10', 'error'}
    submission_name = {get_submission_by_id(session, sub_id).name
                       for sub_id in submissions_id}
    assert submission_name == expected_submission_name
示例#6
0
def test_submission_model_property(session_scope_module):
    # check that the property of Submission
    submission = get_submission_by_id(session_scope_module, 5)
    assert re.match(r'Submission\(iris_test/test_user/.*\)', str(submission))
    assert re.match(r'Submission\(event_name.*\)', repr(submission))

    assert isinstance(submission.team, Team)
    assert isinstance(submission.event, Event)
    assert submission.official_score_name == 'acc'
    assert isinstance(submission.official_score, SubmissionScore)
    assert all([
        isinstance(score, EventScoreType) for score in submission.score_types
    ])
    assert issubclass(submission.Predictions, BasePrediction)
    assert submission.is_not_sandbox is True
    assert submission.is_error is False
    assert submission.is_public_leaderboard is False
    assert submission.is_private_leaderboard is False
    assert (os.path.join('submissions', 'submission_000000005')
            in submission.path)
    assert submission.basename == 'submission_000000005'
    assert "submissions.submission_000000005" in submission.module
    assert len(submission.f_names) == 1
    assert submission.f_names[0] == 'estimator.py'
    assert submission.link == '/' + os.path.join(submission.hash_,
                                                 'estimator.py')
    assert re.match(
        '<a href={}>{}/{}/{}</a>'.format(submission.link,
                                         submission.event.name,
                                         submission.team.name,
                                         submission.name),
        submission.full_name_with_link)
    assert re.match(
        '<a href={}>{}</a>'.format(submission.link, submission.name),
        submission.name_with_link)
    assert re.match(
        '<a href=.*{}.*error.txt>{}</a>'.format(submission.hash_,
                                                submission.state),
        submission.state_with_link)

    for score in submission.ordered_scores(score_names=['acc', 'error']):
        assert isinstance(score, SubmissionScore)
示例#7
0
    def launch_workers(self, session):
        """Launch the awaiting workers if possible."""
        while (not self._processing_worker_queue.full()
               and not self._awaiting_worker_queue.empty()):
            worker, (submission_id, submission_name) = \
                self._awaiting_worker_queue.get()
            self._logger.info(f'Starting worker: {worker}')

            try:
                worker.setup()
                if worker.status != "error":
                    worker.launch_submission()
            except Exception as e:
                self._logger.error(
                    f'Worker finished with unhandled exception:\n {e}')
                worker.status = 'error'
            if worker.status == 'error':
                set_submission_state(session, submission_id, 'checking_error')
                worker.teardown()  # kill the worker
                self._logger.info(f'Worker {worker} killed due to an error '
                                  f'while connecting to AWS worker')
                stderr = ("There was a problem with sending your submission"
                          " for training. This problem is on RAMP side"
                          " and most likely it is not related to your"
                          " code. If this happened for the first time"
                          " to this submission you might"
                          " consider submitting the same code once again."
                          " Else, please contact the event organizers.")
                set_submission_error_msg(session, submission_id, stderr)
                continue
            set_submission_state(session, submission_id, 'training')
            submission = get_submission_by_id(session, submission_id)
            update_user_leaderboards(
                session,
                self._ramp_config['event_name'],
                submission.team.name,
                new_only=True,
            )
            self._processing_worker_queue.put_nowait(
                (worker, (submission_id, submission_name)))
            self._logger.info(
                f'Store the worker {worker} into the processing queue')
示例#8
0
 def fetch_from_db(self, session):
     """Fetch the submission from the database and create the workers."""
     submissions = get_submissions(session,
                                   self._ramp_config['event_name'],
                                   state='new')
     if not submissions:
         logger.info('No new submissions fetch from the database')
         return
     for submission_id, submission_name, _ in submissions:
         # do not train the sandbox submission
         submission = get_submission_by_id(session, submission_id)
         if not submission.is_not_sandbox:
             continue
         # create the worker
         worker = self.worker(self._worker_config, submission_name)
         set_submission_state(session, submission_id, 'sent_to_training')
         self._awaiting_worker_queue.put_nowait(
             (worker, (submission_id, submission_name)))
         logger.info('Submission {} added to the queue of submission to be '
                     'processed'.format(submission_name))
示例#9
0
def test_submit_starting_kits(base_db):
    session = base_db
    config = read_config(ramp_config_template())
    event_name, username = _setup_sign_up(session, config)
    ramp_config = generate_ramp_config(config)

    submit_starting_kits(
        session, event_name, username,
        os.path.join(ramp_config['ramp_kits_dir'], ramp_config['event'],
                     config['ramp']['submissions_dir']))

    submissions = get_submissions(session, event_name, None)
    submissions_id = [sub[0] for sub in submissions]
    assert len(submissions) == 5
    expected_submission_name = {
        'starting_kit', 'starting_kit_test', 'random_forest_10_10', 'error'
    }
    submission_name = set(
        get_submission_by_id(session, sub_id).name
        for sub_id in submissions_id)
    assert submission_name == expected_submission_name
示例#10
0
def test_submission_model_reset(session_scope_module):
    submission = get_submission_by_id(session_scope_module, 5)
    for score in submission.ordered_scores(score_names=['acc', 'error']):
        assert isinstance(score, SubmissionScore)
        # set the score to later test the reset function
        score.valid_score_cv_bag = 1.0
        score.test_score_cv_bag = 1.0
        score.valid_score_cv_bags = np.ones(2)
        score.test_score_cv_bags = np.ones(2)
    # set to non-default the variable that should change with reset
    submission.error_msg = 'simulate an error'
    submission.contributivity = 100.
    submission.reset()
    assert submission.contributivity == pytest.approx(0)
    assert submission.state == 'new'
    assert submission.error_msg == ''
    for score, worse_score in zip(submission.ordered_scores(['acc', 'error']),
                                  [0, 1]):
        assert score.valid_score_cv_bag == pytest.approx(worse_score)
        assert score.test_score_cv_bag == pytest.approx(worse_score)
        assert score.valid_score_cv_bags is None
        assert score.test_score_cv_bags is None
示例#11
0
def test_info_on_training_error(test_launch_ec2_instances, upload_submission,
                                launch_train, is_spot_terminated,
                                training_finished, training_successful,
                                get_log_content, check_instance_status,
                                download_log, session_toy_aws, caplog):
    # make sure that the Python error from the solution is passed to the
    # dispatcher
    # everything shoud be mocked as correct output from AWS instances
    # on setting up the instance and loading the submission
    # mock dummy AWS instance
    class DummyInstance:
        id = 1

    test_launch_ec2_instances.return_value = (DummyInstance(), ), 0
    upload_submission.return_value = 0
    launch_train.return_value = 0
    is_spot_terminated.return_value = 0
    training_finished.return_value = False
    download_log.return_value = 0

    config = read_config(database_config_template())
    event_config = read_config(ramp_aws_config_template())

    dispatcher = Dispatcher(config=config,
                            event_config=event_config,
                            worker=AWSWorker,
                            n_workers=10,
                            hunger_policy='exit')
    dispatcher.fetch_from_db(session_toy_aws)
    dispatcher.launch_workers(session_toy_aws)
    num_running_workers = dispatcher._processing_worker_queue.qsize()
    # worker, (submission_id, submission_name) = \
    #     dispatcher._processing_worker_queue.get()
    # assert worker.status == 'running'
    submissions = get_submissions(session_toy_aws, 'iris_aws_test', 'training')
    ids = [submissions[idx][0] for idx in range(len(submissions))]
    assert len(submissions) > 1
    assert num_running_workers == len(ids)

    dispatcher.time_between_collection = 0
    training_successful.return_value = False

    # now we will end the submission with training error
    training_finished.return_value = True
    training_error_msg = 'Python error here'
    get_log_content.return_value = training_error_msg
    check_instance_status.return_value = 'finished'

    dispatcher.collect_result(session_toy_aws)

    # the worker which we were using should have been teared down
    num_running_workers = dispatcher._processing_worker_queue.qsize()

    assert num_running_workers == 0

    submissions = get_submissions(session_toy_aws, 'iris_aws_test',
                                  'training_error')
    assert len(submissions) == len(ids)

    submission = get_submission_by_id(session_toy_aws, submissions[0][0])
    assert training_error_msg in submission.error_msg
示例#12
0
def test_submission_model_set_state(session_scope_module):
    submission = get_submission_by_id(session_scope_module, 5)
    submission.set_state('scored')
    assert submission.state == 'scored'
    for cv_fold in submission.on_cv_folds:
        assert cv_fold.state == 'scored'
示例#13
0
def score_plot(session, event):
    from bokeh.plotting import figure
    from bokeh.models.sources import ColumnDataSource
    from bokeh.models.formatters import DatetimeTickFormatter

    submissions = get_submissions(session, event.name, None)
    submissions = [
        get_submission_by_id(session, sub_id) for sub_id, _, _ in submissions
        if get_submission_by_id(session, sub_id).is_public_leaderboard
        and get_submission_by_id(session, sub_id).is_valid
    ]
    score_names = [score_type.name for score_type in event.score_types]
    scoress = np.array([[
        score.valid_score_cv_bag
        for score in submission.ordered_scores(score_names)
    ] for submission in submissions]).T

    score_plot_df = pd.DataFrame()
    score_plot_df['submitted at (UTC)'] = [
        submission.submission_timestamp for submission in submissions
    ]
    score_plot_df['contributivity'] = [
        submission.contributivity for submission in submissions
    ]
    score_plot_df['historical contributivity'] = [
        submission.historical_contributivity for submission in submissions
    ]
    for score_name in score_names:  # to make sure the column is created
        score_plot_df[score_name] = 0
    for score_name, scores in zip(score_names, scoress):
        score_plot_df[score_name] = scores

    score_name = event.official_score_name
    score_plot_df = score_plot_df[
        score_plot_df['submitted at (UTC)'] > event.opening_timestamp]
    score_plot_df = score_plot_df.sort_values('submitted at (UTC)')
    score_plot_df = add_pareto(score_plot_df, score_name,
                               event.official_score_type.worst,
                               event.official_score_type.is_lower_the_better)

    is_open = (score_plot_df['submitted at (UTC)'] >
               event.public_opening_timestamp).values

    max_contributivity = max(0.0000001,
                             max(score_plot_df['contributivity'].values))
    max_historical_contributivity = max(
        0.0000001, max(score_plot_df['historical contributivity'].values))

    fill_color_1 = (176, 23, 31)
    fill_color_2 = (16, 78, 139)
    fill_colors_1 = color_gradient(
        fill_color_1,
        score_plot_df['contributivity'].values / max_contributivity)
    fill_colors_2 = color_gradient(
        fill_color_2, score_plot_df['historical contributivity'].values /
        max_historical_contributivity)
    fill_colors = np.minimum(fill_colors_1, fill_colors_2).astype(int)
    fill_colors = ["#%02x%02x%02x" % (c[0], c[1], c[2]) for c in fill_colors]

    score_plot_df['x'] = score_plot_df['submitted at (UTC)']
    score_plot_df['y'] = score_plot_df[score_name]
    score_plot_df['line_color'] = 'royalblue'
    score_plot_df['circle_size'] = 8
    score_plot_df['line_color'] = 'royalblue'
    score_plot_df.loc[is_open, 'line_color'] = 'coral'
    score_plot_df['fill_color'] = fill_colors
    score_plot_df['fill_alpha'] = 0.5
    score_plot_df['line_width'] = 0
    score_plot_df['label'] = 'closed phase'
    score_plot_df.loc[is_open, 'label'] = 'open phase'

    source = ColumnDataSource(score_plot_df)
    pareto_df = score_plot_df[score_plot_df[score_name +
                                            ' pareto'] == 1].copy()
    pareto_df = pareto_df.append(pareto_df.iloc[-1])
    pareto_df.iloc[-1,
                   pareto_df.columns.get_loc('x')] = (max(score_plot_df['x']))
    pareto_df = make_step_df(pareto_df,
                             event.official_score_type.is_lower_the_better)
    source_pareto = ColumnDataSource(pareto_df)

    tools = ['pan,wheel_zoom,box_zoom,reset,save,tap']
    p = figure(plot_width=900, plot_height=600, tools=tools, title='Scores')

    p.circle('x',
             'y',
             size='circle_size',
             line_color='line_color',
             fill_color='fill_color',
             fill_alpha='fill_alpha',
             line_width=1,
             source=source,
             legend='label')
    p.line('x',
           'y',
           line_width=3,
           line_color='goldenrod',
           source=source_pareto,
           legend='best score',
           alpha=0.9)

    p.xaxis.formatter = DatetimeTickFormatter(
        hours=['%d %B %Y'],
        days=['%d %B %Y'],
        months=['%d %B %Y'],
        years=['%d %B %Y'],
    )
    p.xaxis.major_label_orientation = np.pi / 4

    if event.official_score_type.is_lower_the_better:
        p.yaxis.axis_label = score_name + ' (the lower the better)'
        p.legend.location = 'top_right'
    else:
        p.yaxis.axis_label = score_name + ' (the greater the better)'
        p.legend.location = 'bottom_right'
    p.xaxis.axis_label = 'submission timestamp (UTC)'
    p.xaxis.axis_label_text_font_size = '14pt'
    p.yaxis.axis_label_text_font_size = '14pt'
    p.legend.label_text_font_size = '14pt'
    p.title.text_font_size = '16pt'
    p.xaxis.major_label_text_font_size = '10pt'
    p.yaxis.major_label_text_font_size = '10pt'
    return p
示例#14
0
def test_get_submission_by_id(session_scope_module):
    submission = get_submission_by_id(session_scope_module, 1)
    assert isinstance(submission, Submission)
    assert submission.basename == 'submission_000000001'
    assert os.path.exists(os.path.join(submission.path, 'classifier.py'))
    assert submission.state == 'trained'