def new_experiment_status(sender, **kwargs): instance = kwargs['instance'] created = kwargs.get('created', False) experiment = instance.experiment if created: # update experiment last_status experiment.experiment_status = instance experiment.save() if instance.status == ExperimentLifeCycle.SUCCEEDED: # update all workers with succeeded status, since we will trigger a stop mechanism for job in experiment.jobs.all(): if not job.is_done: job.set_status(JobLifeCycle.SUCCEEDED, message='Master is done.') if instance.status in (ExperimentLifeCycle.FAILED, ExperimentLifeCycle.SUCCEEDED): logger.info( 'One of the workers failed or Master for experiment `{}` is done, ' 'send signal to other workers to stop.'.format( experiment.unique_name)) # Schedule stop for this experiment because other jobs may be still running scheduler.stop_experiment(experiment, update_status=False)
def experiment_group_deleted(sender, **kwargs): """Stop all experiments before deleting the group.""" instance = kwargs['instance'] for experiment in instance.running_experiments: scheduler.stop_experiment(experiment, update_status=False) # Delete outputs delete_experiment_group_outputs(instance.unique_name)
def experiment_deleted(sender, **kwargs): instance = kwargs['instance'] try: _ = instance.experiment_group scheduler.stop_experiment(instance, update_status=False) except ExperimentGroup.DoesNotExist: # The experiment was already stopped when the group was deleted pass delete_experiment_outputs(instance.unique_name) delete_experiment_logs(instance.unique_name)
def experiment_group_deleted(sender, **kwargs): """Stop all experiments before deleting the group.""" instance = kwargs['instance'] for experiment in instance.running_experiments: # Delete all jobs from DB before sending a signal to k8s, # this way no statuses will be updated in the meanwhile experiment.jobs.all().delete() scheduler.stop_experiment(experiment, update_status=False) # Delete outputs and logs delete_experiment_group_outputs(instance.unique_name) delete_experiment_group_logs(instance.unique_name)
def experiment_deleted(sender, **kwargs): instance = kwargs['instance'] try: _ = instance.experiment_group # Delete all jobs from DB before sending a signal to k8s, # this way no statuses will be updated in the meanwhile instance.jobs.all().delete() scheduler.stop_experiment(instance, update_status=False) except ExperimentGroup.DoesNotExist: # The experiment was already stopped when the group was deleted pass # Delete outputs and logs delete_experiment_outputs(instance.unique_name) delete_experiment_logs(instance.unique_name)
def handle(self, *args, **options): for experiment in Experiment.objects.filter( experiment_status__status__in=ExperimentLifeCycle. RUNNING_STATUS): scheduler.stop_experiment(experiment)
def stop_experiment(experiment_id): experiment = get_valid_experiment(experiment_id=experiment_id) if not experiment: return scheduler.stop_experiment(experiment, update_status=True)