Exemplo n.º 1
0
def build_experiment(self, experiment_id):
    experiment = get_valid_experiment(experiment_id=experiment_id)
    if not experiment:
        if self.request.retries < 2:
            _logger.info('Trying again for Experiment `%s`.', experiment_id)
            self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)

        _logger.info(
            'Something went wrong, '
            'the Experiment `%s` does not exist anymore.', experiment_id)
        return

    # No need to build the image, start the experiment directly
    if not (experiment.specification.build and experiment.specification.run):
        celery_app.send_task(SchedulerCeleryTasks.EXPERIMENTS_START,
                             kwargs={'experiment_id': experiment_id})
        return

    if not ExperimentLifeCycle.can_transition(
            status_from=experiment.last_status,
            status_to=ExperimentLifeCycle.BUILDING):
        _logger.info('Experiment id `%s` cannot transition from `%s` to `%s`.',
                     experiment_id, experiment.last_status,
                     ExperimentLifeCycle.BUILDING)
        return None

    # Update experiment status to show that its building
    experiment.set_status(ExperimentLifeCycle.BUILDING)

    # Building the docker image
    try:
        status = experiments_builder.build_experiment(experiment)
    except DockerException as e:
        _logger.warning('Failed to build experiment %s', e)
        experiment.set_status(ExperimentLifeCycle.FAILED,
                              message='Failed to build image for experiment.')
        return
    except Repo.DoesNotExist:
        _logger.warning('No code was found for this project')
        experiment.set_status(
            ExperimentLifeCycle.FAILED,
            message='No code was found for to build this experiment.')
        return
    except Exception as e:  # Other exceptions
        _logger.error(
            'Failed to build experiment, unexpected error occurred.\n%s',
            traceback.format_exc())
        experiment.set_status(ExperimentLifeCycle.FAILED,
                              message='Failed to build image for experiment.')
        return

    if not status:
        experiment.set_status(ExperimentLifeCycle.FAILED,
                              message='Failed to build image for experiment.')
        return

    # Now we can start the experiment
    celery_app.send_task(SchedulerCeleryTasks.EXPERIMENTS_START,
                         kwargs={'experiment_id': experiment_id})
Exemplo n.º 2
0
 def set_status(self, status, message=None, traceback=None, **kwargs):
     if status in ExperimentLifeCycle.HEARTBEAT_STATUS:
         RedisHeartBeat.experiment_ping(self.id)
     if ExperimentLifeCycle.can_transition(status_from=self.last_status,
                                           status_to=status):
         ExperimentStatus.objects.create(experiment=self,
                                         status=status,
                                         message=message,
                                         traceback=traceback)
Exemplo n.º 3
0
 def set_status(self, status, created_at=None, message=None, traceback=None, **kwargs):
     if status in ExperimentLifeCycle.HEARTBEAT_STATUS:
         RedisHeartBeat.experiment_ping(self.id)
     last_status = self.last_status_before(status_date=created_at)
     if ExperimentLifeCycle.can_transition(status_from=last_status, status_to=status):
         params = {'created_at': created_at} if created_at else {}
         ExperimentStatus.objects.create(experiment=self,
                                         status=status,
                                         message=message,
                                         traceback=traceback,
                                         **params)
Exemplo n.º 4
0
def build_experiment(self, experiment_id):
    experiment = get_valid_experiment(experiment_id=experiment_id)
    if not experiment:
        if self.request.retries < 2:
            _logger.info('Trying again for Experiment `%s`.', experiment_id)
            self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)

        _logger.info('Something went wrong, '
                     'the Experiment `%s` does not exist anymore.', experiment_id)
        return

    # No need to build the image, start the experiment directly
    if not (experiment.specification.build and experiment.specification.run):
        celery_app.send_task(
            SchedulerCeleryTasks.EXPERIMENTS_START,
            kwargs={'experiment_id': experiment_id})
        return

    if not ExperimentLifeCycle.can_transition(status_from=experiment.last_status,
                                              status_to=ExperimentLifeCycle.BUILDING):
        _logger.info('Experiment id `%s` cannot transition from `%s` to `%s`.',
                     experiment_id, experiment.last_status, ExperimentLifeCycle.BUILDING)
        return None

    # Update experiment status to show that its building
    experiment.set_status(ExperimentLifeCycle.BUILDING)

    # Building the docker image
    try:
        status = experiments_builder.build_experiment(experiment)
    except DockerException as e:
        _logger.warning('Failed to build experiment %s', e)
        experiment.set_status(ExperimentLifeCycle.FAILED,
                              message='Failed to build image for experiment.')
        return
    except Repo.DoesNotExist:
        _logger.warning('No code was found for this project')
        experiment.set_status(ExperimentLifeCycle.FAILED,
                              message='No code was found for to build this experiment.')
        return
    except Exception as e:  # Other exceptions
        _logger.warning('Failed to build experiment %s', e)
        experiment.set_status(ExperimentLifeCycle.FAILED,
                              message='Failed to build image for experiment.')
        return

    if not status:
        return

    # Now we can start the experiment
    celery_app.send_task(
        SchedulerCeleryTasks.EXPERIMENTS_START,
        kwargs={'experiment_id': experiment_id})
Exemplo n.º 5
0
def experiments_start(experiment_id):
    experiment = get_valid_experiment(experiment_id=experiment_id)
    if not experiment:
        _logger.info('Something went wrong, '
                     'the Experiment `%s` does not exist anymore.', experiment_id)
        return

    if not ExperimentLifeCycle.can_transition(status_from=experiment.last_status,
                                              status_to=ExperimentLifeCycle.SCHEDULED):
        _logger.info('Experiment `%s` cannot transition from `%s` to `%s`.',
                     experiment.unique_name, experiment.last_status, ExperimentLifeCycle.SCHEDULED)
        return None

    experiment_scheduler.start_experiment(experiment)
Exemplo n.º 6
0
def experiments_build(experiment_id):
    experiment = get_valid_experiment(experiment_id=experiment_id)
    if not experiment:
        return

    # No need to build the image, start the experiment directly
    if not (experiment.specification.build and experiment.specification.run):
        celery_app.send_task(SchedulerCeleryTasks.EXPERIMENTS_START,
                             kwargs={'experiment_id': experiment_id},
                             countdown=conf.get('GLOBAL_COUNTDOWN'))
        return

    last_status = experiment.last_status
    if not ExperimentLifeCycle.can_transition(
            status_from=last_status, status_to=ExperimentLifeCycle.BUILDING):
        _logger.info('Experiment id `%s` cannot transition from `%s` to `%s`.',
                     experiment_id, last_status, ExperimentLifeCycle.BUILDING)
        return

    build_job, image_exists, build_status = dockerizer_scheduler.create_build_job(
        user=experiment.user,
        project=experiment.project,
        config=experiment.specification.build,
        configmap_refs=experiment.specification.configmap_refs,
        secret_refs=experiment.specification.secret_refs,
        code_reference=experiment.code_reference)

    experiment.build_job = build_job
    experiment.save(update_fields=['build_job'])
    if image_exists:
        # The image already exists, so we can start the experiment right away
        celery_app.send_task(SchedulerCeleryTasks.EXPERIMENTS_START,
                             kwargs={'experiment_id': experiment_id},
                             countdown=conf.get('GLOBAL_COUNTDOWN'))
        return

    if not build_status:
        experiment.set_status(ExperimentLifeCycle.FAILED,
                              message='Could not start build process.')
        return

    # Update experiment status to show that its building
    experiment.set_status(ExperimentLifeCycle.BUILDING)
Exemplo n.º 7
0
def experiments_start(experiment_id):
    experiment = get_valid_experiment(experiment_id=experiment_id)
    if not experiment:
        _logger.info('Something went wrong, '
                     'the Experiment `%s` does not exist anymore.', experiment_id)
        return

    if not ExperimentLifeCycle.can_transition(status_from=experiment.last_status,
                                              status_to=ExperimentLifeCycle.SCHEDULED):
        _logger.info('Experiment `%s` cannot transition from `%s` to `%s`.',
                     experiment.unique_name, experiment.last_status, ExperimentLifeCycle.SCHEDULED)
        return None

    # Check if we need to copy an experiment
    if experiment.is_copy:
        copy_experiment(experiment)
    else:
        create_experiment_outputs_path(experiment.unique_name)

    experiment_scheduler.start_experiment(experiment)
Exemplo n.º 8
0
def experiments_build(experiment_id):
    experiment = get_valid_experiment(experiment_id=experiment_id)
    if not experiment:
        return

    # No need to build the image, start the experiment directly
    if not (experiment.specification.build and experiment.specification.run):
        celery_app.send_task(
            SchedulerCeleryTasks.EXPERIMENTS_START,
            kwargs={'experiment_id': experiment_id})
        return

    if not ExperimentLifeCycle.can_transition(status_from=experiment.last_status,
                                              status_to=ExperimentLifeCycle.BUILDING):
        _logger.info('Experiment id `%s` cannot transition from `%s` to `%s`.',
                     experiment_id, experiment.last_status, ExperimentLifeCycle.BUILDING)
        return

    build_job, image_exists, build_status = dockerizer_scheduler.create_build_job(
        user=experiment.user,
        project=experiment.project,
        config=experiment.specification.build,
        code_reference=experiment.code_reference)

    experiment.build_job = build_job
    experiment.save()
    if image_exists:
        # The image already exists, so we can start the experiment right away
        celery_app.send_task(
            SchedulerCeleryTasks.EXPERIMENTS_START,
            kwargs={'experiment_id': experiment_id})
        return

    if not build_status:
        experiment.set_status(ExperimentLifeCycle.FAILED, message='Could not start build process.')
        return

    # Update experiment status to show that its building
    experiment.set_status(ExperimentLifeCycle.BUILDING)