def setUp(self): logger.addHandler(YabiDBHandler()) create_workflow_with_job_and_a_task(self) self.wfl_logger = create_workflow_logger(logger, self.workflow.pk) self.job_logger = create_job_logger(logger, self.job.pk) self.task_logger = create_task_logger(logger, self.task.pk) self.other_wfl_logger = create_workflow_logger(logger, self.workflow.pk + 1) self.other_job_logger = create_job_logger(logger, self.job.pk + 1) self.other_task_logger = create_task_logger(logger, self.task.pk + 1)
def create_db_tasks(job_id): if job_id is None: logger.info("create_db_tasks received no job_id. Skipping processing.") return None job_logger = create_job_logger(logger, job_id) request = get_current_celery_task().request try: job = EngineJob.objects.get(pk=job_id) if job.status == STATUS_READY: # Handling case when in a previous execution the Celery worker died # after tasks have been created and the transaction has been # commited, but the Celery task didn't return yet assert job.total_tasks() > 0, "Job in READY state, but has no tasks" job_logger.warning("Job was already in READY state. Skipping creation of db tasks.") return job_id if job.is_workflow_aborting: abort_job(job) return None tasks_count = job.create_tasks() if not tasks_count: return None return job_id except DecryptedCredentialNotAvailable as dcna: job_logger.exception("Decrypted credential not available.") countdown = backoff(request.retries) job_logger.warning("create_db_tasks.retry {0} in {1} seconds".format(job_id, countdown)) raise get_current_celery_task().retry(exc=dcna, countdown=countdown) except Exception: job_logger.exception("Exception in create_db_tasks for job {0}".format(job_id)) mark_job_as_error(job_id) raise
def create_tasks(self): job_logger = create_job_logger(logger, self.pk) logger.debug('----- creating tasks for Job %s -----' % self.pk) assert self.total_tasks() == 0, "Job already has tasks" updated = Job.objects.filter(pk=self.pk, status=const.STATUS_PENDING).update( status=const.JOB_STATUS_PROCESSING) if updated == 0: job_logger.info( "Another process_jobs() must have picked up job %s already" % self.pk) return self.update_dependencies() input_files = self.get_input_files() self.create_one_task_for_each_input_file(input_files) # there must be at least one task for every job if not self.total_tasks(): job_logger.critical('No tasks for job: %s' % self.pk) raise Exception('No tasks for job: %s' % self.pk) # mark job as ready so it can be requested by a backend self.status = const.STATUS_READY self.save() self.make_tasks_ready() return self.total_tasks()
def create_tasks(self): job_logger = create_job_logger(logger, self.pk) logger.debug('----- creating tasks for Job %s -----' % self.pk) assert self.total_tasks() == 0, "Job already has tasks" updated = Job.objects.filter(pk=self.pk, status=const.STATUS_PENDING).update(status=const.JOB_STATUS_PROCESSING) if updated == 0: job_logger.info("Another process_jobs() must have picked up job %s already" % self.pk) return self.update_dependencies() input_files = self.get_input_files() self.create_one_task_for_each_input_file(input_files) # there must be at least one task for every job if not self.total_tasks(): job_logger.critical('No tasks for job: %s' % self.pk) raise Exception('No tasks for job: %s' % self.pk) # mark job as ready so it can be requested by a backend self.status = const.STATUS_READY self.save() self.make_tasks_ready() return self.total_tasks()
def poll_until_dynbes_ready(job_id): job_logger = create_job_logger(logger, job_id) job = EngineJob.objects.get(pk=job_id) if job.is_workflow_aborting: abort_job(job) return None try: job_dynbes = job.dynamic_backends.distinct() instances_ready = map(provisioning.is_instance_ready, job_dynbes) if not all(instances_ready): raise get_current_celery_task().retry() for dynbe in job_dynbes: provisioning.update_dynbe_ip_addresses(job) return job_id except celery.exceptions.RetryTaskError: # raised by the retry above, just re-raise. raise except Exception: job_logger.exception("Error in poll_until_dynbes_ready for job '%s'.", job_id) raise get_current_celery_task().retry()
def provision_ex_be(job_id): job_logger = create_job_logger(logger, job_id) try: provision_be(job_id, "ex") return job_id except Exception: job_logger.exception("Exception in provision_ex_be for job {0}".format(job_id)) mark_job_as_error(job_id) raise
def test_logging_with_job_logger(self): logger = logging.getLogger('yabi.backend.celerytasks') job_logger = create_job_logger(logger, JOB_ID) job_logger.debug(MSG) self.assertEquals(1, Syslog.objects.filter(message=MSG).count(), "Message should been syslogged") syslog = Syslog.objects.get(message=MSG) self.assertEquals('job', syslog.table_name) self.assertEquals(JOB_ID, syslog.table_id)
def provision_ex_be(job_id): job_logger = create_job_logger(logger, job_id) try: provision_be(job_id, 'ex') return job_id except Exception: job_logger.exception( "Exception in provision_ex_be for job {0}".format(job_id)) mark_job_as_error(job_id) raise
def clean_up_dynamic_backends(job_id): job_logger = create_job_logger(logger, job_id) try: job = EngineJob.objects.get(pk=job_id) dynamic_backends = job.dynamicbackendinstance_set.filter(destroyed_on__isnull=True) if dynamic_backends.count() == 0: logger.info("Job %s has no dynamic backends to be cleaned up.", job_id) return for dynamic_be in dynamic_backends: logger.info("Cleaning up dynamic backend %s", dynamic_be.hostname) provisioning.destroy_backend(dynamic_be) return job_id except Exception: job_logger.exception("Exception in clean_up_dynamic_backends for job {0}".format(job_id)) raise get_current_celery_task().retry()
def spawn_ready_tasks(job_id): if job_id is None: logger.info( "spawn_ready_tasks received no job_id. Skipping processing.") return None job_logger = create_job_logger(logger, job_id) job_logger.info("Starting spawn_ready_tasks for Job %s", job_id) try: job = EngineJob.objects.get(pk=job_id) ready_tasks = job.ready_tasks() logger.debug(ready_tasks) aborting = job.is_workflow_aborting if aborting: abort_job(job) for task in ready_tasks: task.set_status(STATUS_ABORTED) task.save() else: spawn_status = {} for task in ready_tasks: spawn_status[task.pk] = spawn_task(task) if not all(spawn_status.values()): not_spawned = [e[0] for e in spawn_status.items() if not e[1]] job_logger.info("Couldn't spawn tasks: %s", not_spawned) current_task = get_current_celery_task() current_task.retry(countdown=TASK_LIMIT_REACHED_RETRY_INTERVAL) # need to update task.job.status here when all tasks for job spawned ? job_logger.info("Finished spawn_ready_tasks for Job %s", job_id) return job_id except celery.exceptions.RetryTaskError: # This is normal operation, Celery is signaling to the Worker # that this task should be retried by throwing an RetryTaskError # Just re-raise it raise except Exception: job_logger.exception( "Exception when spawning tasks for job {0}".format(job_id)) mark_job_as_error(job_id) raise
def clean_up_dynamic_backends(job_id): job_logger = create_job_logger(logger, job_id) try: job = EngineJob.objects.get(pk=job_id) dynamic_backends = job.dynamicbackendinstance_set.filter( destroyed_on__isnull=True) if dynamic_backends.count() == 0: logger.info("Job %s has no dynamic backends to be cleaned up.", job_id) return for dynamic_be in dynamic_backends: logger.info("Cleaning up dynamic backend %s", dynamic_be.hostname) provisioning.destroy_backend(dynamic_be) return job_id except Exception: job_logger.exception( "Exception in clean_up_dynamic_backends for job {0}".format( job_id)) raise get_current_celery_task().retry()
def spawn_ready_tasks(job_id): if job_id is None: logger.info("spawn_ready_tasks received no job_id. Skipping processing.") return None job_logger = create_job_logger(logger, job_id) job_logger.info("Starting spawn_ready_tasks for Job %s", job_id) try: job = EngineJob.objects.get(pk=job_id) ready_tasks = job.ready_tasks() logger.debug(ready_tasks) aborting = job.is_workflow_aborting if aborting: abort_job(job) for task in ready_tasks: task.set_status(STATUS_ABORTED) task.save() else: spawn_status = {} for task in ready_tasks: spawn_status[task.pk] = spawn_task(task) if not all(spawn_status.values()): not_spawned = [e[0] for e in spawn_status.items() if not e[1]] job_logger.info("Couldn't spawn tasks: %s", not_spawned) current_task = get_current_celery_task() current_task.retry(countdown=TASK_LIMIT_REACHED_RETRY_INTERVAL) # need to update task.job.status here when all tasks for job spawned ? job_logger.info("Finished spawn_ready_tasks for Job %s", job_id) return job_id except celery.exceptions.RetryTaskError: # This is normal operation, Celery is signaling to the Worker # that this task should be retried by throwing an RetryTaskError # Just re-raise it raise except Exception: job_logger.exception("Exception when spawning tasks for job {0}".format(job_id)) mark_job_as_error(job_id) raise
def create_db_tasks(job_id): if job_id is None: logger.info("create_db_tasks received no job_id. Skipping processing.") return None job_logger = create_job_logger(logger, job_id) request = get_current_celery_task().request try: job = EngineJob.objects.get(pk=job_id) if job.status == STATUS_READY: # Handling case when in a previous execution the Celery worker died # after tasks have been created and the transaction has been # commited, but the Celery task didn't return yet assert job.total_tasks( ) > 0, "Job in READY state, but has no tasks" job_logger.warning( "Job was already in READY state. Skipping creation of db tasks." ) return job_id if job.is_workflow_aborting: abort_job(job) return None tasks_count = job.create_tasks() if not tasks_count: return None return job_id except DecryptedCredentialNotAvailable as dcna: job_logger.exception("Decrypted credential not available.") countdown = backoff(request.retries) job_logger.warning('create_db_tasks.retry {0} in {1} seconds'.format( job_id, countdown)) raise get_current_celery_task().retry(exc=dcna, countdown=countdown) except Exception: job_logger.exception( "Exception in create_db_tasks for job {0}".format(job_id)) mark_job_as_error(job_id) raise