def tensorboard_job_status_post_save(sender, **kwargs): instance = kwargs['instance'] job = instance.job previous_status = job.last_status # Update job last_status job.status = instance set_job_started_at(instance=job, status=instance.status) set_job_finished_at(instance=job, status=instance.status) job.save(update_fields=['status', 'started_at', 'updated_at', 'finished_at']) auditor.record(event_type=TENSORBOARD_NEW_STATUS, instance=job, previous_status=previous_status, target='project') if instance.status == JobLifeCycle.STOPPED: auditor.record(event_type=TENSORBOARD_STOPPED, instance=job, previous_status=previous_status, target='project') elif instance.status == JobLifeCycle.FAILED: auditor.record(event_type=TENSORBOARD_FAILED, instance=job, previous_status=previous_status, target='project') elif instance.status == JobLifeCycle.STOPPED: auditor.record(event_type=TENSORBOARD_SUCCEEDED, instance=job, previous_status=previous_status, target='project') if JobLifeCycle.is_done(instance.status): RedisStatuses.delete_status(job.uuid.hex) new_operation_run_status(entity_type=content_types.TENSORBOARD_JOB, entity=job, status=instance.status)
def build_job_status_post_save(sender, **kwargs): instance = kwargs['instance'] job = instance.job previous_status = job.last_status # Update job last_status job.status = instance set_job_started_at(instance=job, status=instance.status) set_job_finished_at(instance=job, status=instance.status) job.save(update_fields=['status', 'started_at', 'finished_at']) auditor.record(event_type=BUILD_JOB_NEW_STATUS, instance=job, previous_status=previous_status) if instance.status == JobLifeCycle.CREATED: auditor.record(event_type=BUILD_JOB_CREATED, instance=job) elif instance.status == JobLifeCycle.STOPPED: auditor.record(event_type=BUILD_JOB_STOPPED, instance=job, previous_status=previous_status) elif instance.status == JobLifeCycle.FAILED: auditor.record(event_type=BUILD_JOB_FAILED, instance=job, previous_status=previous_status) elif instance.status == JobLifeCycle.SUCCEEDED: auditor.record(event_type=BUILD_JOB_SUCCEEDED, instance=job, previous_status=previous_status) # handle done status if JobLifeCycle.is_done(instance.status): auditor.record(event_type=BUILD_JOB_DONE, instance=job, previous_status=previous_status)
def job_status_post_save(sender, **kwargs): instance = kwargs['instance'] job = instance.job previous_status = job.last_status # Update job last_status job.status = instance set_job_started_at(instance=job, status=instance.status) set_job_finished_at(instance=job, status=instance.status) job.save(update_fields=['status', 'started_at', 'updated_at', 'finished_at']) auditor.record(event_type=JOB_NEW_STATUS, instance=job, previous_status=previous_status) if instance.status == JobLifeCycle.CREATED: auditor.record(event_type=JOB_CREATED, instance=job) elif instance.status == JobLifeCycle.STOPPED: auditor.record(event_type=JOB_STOPPED, instance=job, previous_status=previous_status) elif instance.status == JobLifeCycle.FAILED: auditor.record(event_type=JOB_FAILED, instance=job, previous_status=previous_status) elif instance.status == JobLifeCycle.SUCCEEDED: auditor.record(event_type=JOB_SUCCEEDED, instance=job, previous_status=previous_status) if JobLifeCycle.is_done(instance.status): auditor.record(event_type=JOB_DONE, instance=job, previous_status=previous_status) RedisStatuses.delete_status(job.uuid.hex) new_operation_run_status(entity_type=content_types.JOB, entity=job, status=instance.status)
def tensorboard_job_status_post_save(sender, **kwargs): instance = kwargs['instance'] job = instance.job previous_status = job.last_status # Update job last_status job.status = instance set_job_started_at(instance=job, status=instance.status) set_job_finished_at(instance=job, status=instance.status) job.save(update_fields=['status', 'started_at', 'finished_at']) auditor.record(event_type=TENSORBOARD_NEW_STATUS, instance=job, previous_status=previous_status, target='project') if instance.status == JobLifeCycle.STOPPED: auditor.record(event_type=TENSORBOARD_STOPPED, instance=job, previous_status=previous_status, target='project') elif instance.status == JobLifeCycle.FAILED: auditor.record(event_type=TENSORBOARD_FAILED, instance=job, previous_status=previous_status, target='project') elif instance.status == JobLifeCycle.STOPPED: auditor.record(event_type=TENSORBOARD_SUCCEEDED, instance=job, previous_status=previous_status, target='project')
def experiment_job_status_post_save(sender, **kwargs): instance = kwargs['instance'] job = instance.job # update job last_status job.status = instance set_job_started_at(instance=job, status=instance.status) set_job_finished_at(instance=job, status=instance.status) job.save() # check if the new status is done to remove the containers from the monitors if job.is_done: from libs.redis_db import RedisJobContainers RedisJobContainers.remove_job(job.uuid.hex) # Check if we need to change the experiment status experiment = instance.job.experiment if experiment.is_done: return celery_app.send_task( SchedulerCeleryTasks.EXPERIMENTS_CHECK_STATUS, kwargs={'experiment_id': experiment.id}, countdown=1)
def notebook_job_status_post_save(sender, **kwargs): instance = kwargs['instance'] job = instance.job previous_status = job.last_status # Update job last_status job.status = instance set_job_started_at(instance=job, status=instance.status) set_job_finished_at(instance=job, status=instance.status) job.save( update_fields=['status', 'started_at', 'updated_at', 'finished_at']) auditor.record(event_type=NOTEBOOK_NEW_STATUS, instance=job, previous_status=previous_status, target='project') if instance.status == JobLifeCycle.STOPPED: auditor.record(event_type=NOTEBOOK_STOPPED, instance=job, previous_status=previous_status, target='project') elif instance.status == JobLifeCycle.FAILED: auditor.record(event_type=NOTEBOOK_FAILED, instance=job, previous_status=previous_status, target='project') elif instance.status == JobLifeCycle.STOPPED: auditor.record(event_type=NOTEBOOK_SUCCEEDED, instance=job, previous_status=previous_status, target='project') new_operation_run_status(entity_type=content_types.NOTEBOOK_JOB, entity=job, status=instance.status)
def notebook_job_status_post_save(sender, **kwargs): instance = kwargs['instance'] job = instance.job previous_status = job.last_status # Update job last_status job.status = instance set_job_started_at(instance=job, status=instance.status) set_job_finished_at(instance=job, status=instance.status) job.save() auditor.record(event_type=NOTEBOOK_NEW_STATUS, instance=job, previous_status=previous_status, target='project') if instance.status == JobLifeCycle.STOPPED: auditor.record(event_type=NOTEBOOK_STOPPED, instance=job, previous_status=previous_status, target='project') if instance.status == JobLifeCycle.FAILED: auditor.record(event_type=NOTEBOOK_FAILED, instance=job, previous_status=previous_status, target='project') if instance.status == JobLifeCycle.STOPPED: auditor.record(event_type=NOTEBOOK_SUCCEEDED, instance=job, previous_status=previous_status, target='project')
def build_job_status_post_save(sender, **kwargs): instance = kwargs['instance'] job = instance.job previous_status = job.last_status # Update job last_status job.status = instance set_job_started_at(instance=job, status=instance.status) set_job_finished_at(instance=job, status=instance.status) job.save(update_fields=['status', 'started_at', 'finished_at']) auditor.record(event_type=BUILD_JOB_NEW_STATUS, instance=job, previous_status=previous_status) if instance.status == JobLifeCycle.STOPPED: auditor.record(event_type=BUILD_JOB_STOPPED, instance=job, previous_status=previous_status) if instance.status == JobLifeCycle.FAILED: auditor.record(event_type=BUILD_JOB_FAILED, instance=job, previous_status=previous_status) if instance.status == JobLifeCycle.SUCCEEDED: auditor.record(event_type=BUILD_JOB_SUCCEEDED, instance=job, previous_status=previous_status) # Check if we need to schedule a job stop if instance.status in (JobLifeCycle.FAILED, JobLifeCycle.SUCCEEDED): _logger.info( 'The build job `%s` failed or is done, ' 'send signal to stop.', job.unique_name) # Schedule stop for this job celery_app.send_task(SchedulerCeleryTasks.BUILD_JOBS_STOP, kwargs={ 'project_name': job.project.unique_name, 'project_uuid': job.project.uuid.hex, 'build_job_name': job.unique_name, 'build_job_uuid': job.uuid.hex, 'update_status': False, 'collect_logs': True, }, countdown=RedisTTL.get_for_build(build_id=job.id)) # handle done status if JobLifeCycle.is_done(instance.status): auditor.record(event_type=BUILD_JOB_DONE, instance=job, previous_status=previous_status) celery_app.send_task(SchedulerCeleryTasks.BUILD_JOBS_NOTIFY_DONE, kwargs={'build_job_id': job.id})
def job_status_post_save(sender, **kwargs): instance = kwargs['instance'] job = instance.job previous_status = job.last_status # Update job last_status job.status = instance set_job_started_at(instance=job, status=instance.status) set_job_finished_at(instance=job, status=instance.status) job.save(update_fields=['status']) auditor.record(event_type=JOB_NEW_STATUS, instance=job, previous_status=previous_status) if instance.status == JobLifeCycle.STOPPED: auditor.record(event_type=JOB_STOPPED, instance=job, previous_status=previous_status) if instance.status == JobLifeCycle.FAILED: auditor.record(event_type=JOB_FAILED, instance=job, previous_status=previous_status) if instance.status == JobLifeCycle.SUCCEEDED: auditor.record(event_type=JOB_SUCCEEDED, instance=job, previous_status=previous_status) if JobLifeCycle.is_done(instance.status): auditor.record(event_type=JOB_DONE, instance=job, previous_status=previous_status) # Check if we need to schedule a job stop if not job.specification: return if instance.status in (JobLifeCycle.FAILED, JobLifeCycle.SUCCEEDED): _logger.debug('The build job `%s` failed or is done, ' 'send signal to stop.', job.unique_name) # Schedule stop for this job because celery_app.send_task( SchedulerCeleryTasks.JOBS_STOP, kwargs={ 'project_name': job.project.unique_name, 'project_uuid': job.project.uuid.hex, 'job_name': job.unique_name, 'job_uuid': job.uuid.hex, 'specification': job.config, 'update_status': False }, countdown=RedisTTL.get_for_job(job_id=job.id))
def experiment_job_status_post_save(sender, **kwargs): instance = kwargs['instance'] job = instance.job job.status = instance set_job_started_at(instance=job, status=instance.status) set_job_finished_at(instance=job, status=instance.status) job.save(update_fields=['status', 'started_at', 'finished_at']) # check if the new status is done to remove the containers from the monitors if job.is_done: from db.redis.containers import RedisJobContainers RedisJobContainers.remove_job(job.uuid.hex) # Check if we need to change the experiment status auditor.record(event_type=EXPERIMENT_JOB_NEW_STATUS, instance=job)
def tensorboard_job_status_post_save(sender, **kwargs): instance = kwargs['instance'] job = instance.job previous_status = job.last_status # Update job last_status job.status = instance set_job_started_at(instance=job, status=instance.status) set_job_finished_at(instance=job, status=instance.status) job.save(update_fields=['status', 'started_at', 'finished_at']) auditor.record(event_type=TENSORBOARD_NEW_STATUS, instance=job, previous_status=previous_status, target='project') if instance.status == JobLifeCycle.STOPPED: auditor.record(event_type=TENSORBOARD_STOPPED, instance=job, previous_status=previous_status, target='project') if instance.status == JobLifeCycle.FAILED: auditor.record(event_type=TENSORBOARD_FAILED, instance=job, previous_status=previous_status, target='project') # Schedule stop for this tensorboard celery_app.send_task( SchedulerCeleryTasks.TENSORBOARDS_STOP, kwargs={ 'project_name': job.project.unique_name, 'project_uuid': job.project.uuid.hex, 'tensorboard_job_name': job.unique_name, 'tensorboard_job_uuid': job.uuid.hex, 'update_status': False }) if instance.status == JobLifeCycle.STOPPED: auditor.record(event_type=TENSORBOARD_SUCCEEDED, instance=job, previous_status=previous_status, target='project')