示例#1
0
def new_experiment_status(sender, **kwargs):
    instance = kwargs['instance']
    created = kwargs.get('created', False)
    experiment = instance.experiment

    if created:
        # update experiment last_status
        experiment.experiment_status = instance
        experiment.save()

    if instance.status == ExperimentLifeCycle.SUCCEEDED:
        # update all workers with succeeded status, since we will trigger a stop mechanism
        for job in experiment.jobs.all():
            if not job.is_done:
                job.set_status(JobLifeCycle.SUCCEEDED,
                               message='Master is done.')

    if instance.status in (ExperimentLifeCycle.FAILED,
                           ExperimentLifeCycle.SUCCEEDED):
        logger.info(
            'One of the workers failed or Master for experiment `{}` is done, '
            'send signal to other workers to stop.'.format(
                experiment.unique_name))
        # Schedule stop for this experiment because other jobs may be still running
        scheduler.stop_experiment(experiment, update_status=False)
示例#2
0
def experiment_group_deleted(sender, **kwargs):
    """Stop all experiments before deleting the group."""

    instance = kwargs['instance']
    for experiment in instance.running_experiments:
        scheduler.stop_experiment(experiment, update_status=False)

    # Delete outputs
    delete_experiment_group_outputs(instance.unique_name)
示例#3
0
def experiment_deleted(sender, **kwargs):
    instance = kwargs['instance']
    try:
        _ = instance.experiment_group
        scheduler.stop_experiment(instance, update_status=False)
    except ExperimentGroup.DoesNotExist:
        # The experiment was already stopped when the group was deleted
        pass

    delete_experiment_outputs(instance.unique_name)
    delete_experiment_logs(instance.unique_name)
示例#4
0
def experiment_group_deleted(sender, **kwargs):
    """Stop all experiments before deleting the group."""

    instance = kwargs['instance']
    for experiment in instance.running_experiments:
        # Delete all jobs from DB before sending a signal to k8s,
        # this way no statuses will be updated in the meanwhile
        experiment.jobs.all().delete()
        scheduler.stop_experiment(experiment, update_status=False)

    # Delete outputs and logs
    delete_experiment_group_outputs(instance.unique_name)
    delete_experiment_group_logs(instance.unique_name)
示例#5
0
def experiment_deleted(sender, **kwargs):
    instance = kwargs['instance']
    try:
        _ = instance.experiment_group
        # Delete all jobs from DB before sending a signal to k8s,
        # this way no statuses will be updated in the meanwhile
        instance.jobs.all().delete()
        scheduler.stop_experiment(instance, update_status=False)
    except ExperimentGroup.DoesNotExist:
        # The experiment was already stopped when the group was deleted
        pass

    # Delete outputs and logs
    delete_experiment_outputs(instance.unique_name)
    delete_experiment_logs(instance.unique_name)
示例#6
0
 def handle(self, *args, **options):
     for experiment in Experiment.objects.filter(
             experiment_status__status__in=ExperimentLifeCycle.
             RUNNING_STATUS):
         scheduler.stop_experiment(experiment)
示例#7
0
def stop_experiment(experiment_id):
    experiment = get_valid_experiment(experiment_id=experiment_id)
    if not experiment:
        return

    scheduler.stop_experiment(experiment, update_status=True)