Пример #1
0
def experiments_group_schedule_deletion(experiment_group_id, immediate=False):
    experiment_group = get_valid_experiment_group(experiment_group_id=experiment_group_id,
                                                  include_deleted=True)
    if not experiment_group:
        # No need to check this group
        return

    experiment_group.archive()

    if experiment_group.is_stoppable:
        workers.send(
            SchedulerCeleryTasks.EXPERIMENTS_GROUP_STOP,
            kwargs={
                'experiment_group_id': experiment_group_id,
                'collect_logs': False,
                'message': 'Experiment Group is scheduled for deletion.'
            },
            countdown=conf.get(SCHEDULER_GLOBAL_COUNTDOWN))

    if immediate:
        workers.send(
            SchedulerCeleryTasks.DELETE_ARCHIVED_EXPERIMENT_GROUP,
            kwargs={
                'group_id': experiment_group_id,
            },
            countdown=conf.get(SCHEDULER_GLOBAL_COUNTDOWN_DELAYED))
Пример #2
0
def experiments_group_schedule_deletion(experiment_group_id, immediate=False):
    experiment_group = get_valid_experiment_group(
        experiment_group_id=experiment_group_id, include_deleted=True)
    if not experiment_group:
        # No need to check this group
        return

    experiment_group.archive()

    if experiment_group.is_running:
        celery_app.send_task(
            SchedulerCeleryTasks.EXPERIMENTS_GROUP_STOP_EXPERIMENTS,
            kwargs={
                'experiment_group_id': experiment_group_id,
                'pending': False,
                'collect_logs': False,
                'message': 'Experiment Group is scheduled for deletion.'
            })

    if immediate:
        celery_app.send_task(
            SchedulerCeleryTasks.DELETE_ARCHIVED_EXPERIMENT_GROUP,
            kwargs={
                'group_id': experiment_group_id,
            })
Пример #3
0
def experiments_group_check_finished(self, experiment_group_id):
    experiment_group = get_valid_experiment_group(
        experiment_group_id=experiment_group_id)
    if experiment_group.non_done_experiments.exists():
        self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)
        return

    experiment_group.set_status(status=ExperimentGroupLifeCycle.SUCCEEDED)
Пример #4
0
def experiments_group_check_finished(self, experiment_group_id):
    experiment_group = get_valid_experiment_group(experiment_group_id=experiment_group_id)
    if not experiment_group or experiment_group.is_done:
        # No need to check this group
        return

    if experiment_group.non_done_experiments.exists():
        self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)
        return

    experiment_group.set_status(status=ExperimentGroupLifeCycle.SUCCEEDED)
Пример #5
0
def experiments_group_check_done(self, experiment_group_id, auto_retry=False):
    experiment_group = get_valid_experiment_group(experiment_group_id=experiment_group_id)
    if not experiment_group or experiment_group.is_done:
        # No need to check this group
        return

    if experiment_group.non_done_experiments.exists():
        if auto_retry:
            self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)
        return

    experiment_group.set_status(status=ExperimentGroupLifeCycle.DONE)
Пример #6
0
def _get_group_or_retry(experiment_group_id, task):
    experiment_group = get_valid_experiment_group(experiment_group_id=experiment_group_id)
    if experiment_group:
        return experiment_group

    # We retry if experiment group does not exist
    if task.request.retries < 2:
        _logger.info('Trying again for ExperimentGroup `%s`.', experiment_group_id)
        task.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)

    _logger.info('Something went wrong, '
                 'the ExperimentGroup `%s` does not exist anymore.', experiment_group_id)
    return None
Пример #7
0
def _get_group_or_retry(experiment_group_id, task):
    experiment_group = get_valid_experiment_group(experiment_group_id=experiment_group_id)
    if experiment_group:
        return experiment_group

    # We retry if experiment group does not exist
    if task.request.retries < 2:
        _logger.info('Trying again for ExperimentGroup `%s`.', experiment_group_id)
        task.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)

    _logger.info('Something went wrong, '
                 'the ExperimentGroup `%s` does not exist anymore.', experiment_group_id)
    return None
Пример #8
0
def experiments_group_stop_experiments(experiment_group_id,
                                       pending,
                                       collect_logs=True,
                                       message=None):
    experiment_group = get_valid_experiment_group(experiment_group_id=experiment_group_id,
                                                  include_deleted=True)
    if not experiment_group:
        return

    if pending:
        # this won't work for archived groups anyways!
        for experiment in experiment_group.pending_experiments:
            # Update experiment status to show that its stopped
            experiment.set_status(status=ExperimentLifeCycle.STOPPED, message=message)
    else:
        experiments = experiment_group.all_experiments.exclude(
            status__status__in=ExperimentLifeCycle.DONE_STATUS).distinct()
        for experiment in experiments:
            if experiment.is_stoppable:
                celery_app.send_task(
                    SchedulerCeleryTasks.EXPERIMENTS_STOP,
                    kwargs={
                        'project_name': experiment.project.unique_name,
                        'project_uuid': experiment.project.uuid.hex,
                        'experiment_name': experiment.unique_name,
                        'experiment_uuid': experiment.uuid.hex,
                        'experiment_group_name': experiment_group.unique_name,
                        'experiment_group_uuid': experiment_group.uuid.hex,
                        'specification': experiment.config,
                        'update_status': True,
                        'collect_logs': collect_logs
                    })
            else:
                # Update experiment status to show that its stopped
                experiment.set_status(status=ExperimentLifeCycle.STOPPED, message=message)

    experiment_group.set_status(ExperimentGroupLifeCycle.STOPPED, message=message)