def test_execute_job_group_failure_releases_lock(self): """After a failure, the worker should be able to accept another job. """ jobgroup_a, calls_a = TestWorker.new_jobgroup(1) task_type_a = FakeTaskType([Exception()]) cms.service.Worker.get_task_type = Mock(return_value=task_type_a) try: JobGroup.import_from_dict( self.service.execute_job_group(jobgroup_a.export_to_dict())) except JobException: # Expected. pass else: self.fail("Expected Jobexception from tasktype.") cms.service.Worker.get_task_type.assert_has_calls( calls_a, any_order=True) self.assertEquals(task_type_a.call_count, 1) jobgroup_b, calls_b = TestWorker.new_jobgroup(3) task_type_b = FakeTaskType([True, True, True]) cms.service.Worker.get_task_type = Mock(return_value=task_type_b) JobGroup.import_from_dict( self.service.execute_job_group(jobgroup_b.export_to_dict())) cms.service.Worker.get_task_type.assert_has_calls( calls_b, any_order=True) self.assertEquals(task_type_b.call_count, 3)
def test_execute_job_subsequent_success(self): """Executes three successful jobs, then four others. """ n_jobs_a = 3 jobs_a, calls_a = TestWorker.new_jobs(n_jobs_a, prefix="a") task_type_a = FakeTaskType([True] * n_jobs_a) cms.service.Worker.get_task_type = Mock(return_value=task_type_a) for job in jobs_a: job_group = JobGroup([job]) JobGroup.import_from_dict( self.service.execute_job_group(job_group.export_to_dict())) cms.service.Worker.get_task_type.assert_has_calls(calls_a) self.assertEquals(task_type_a.call_count, n_jobs_a) n_jobs_b = 4 jobs_b, calls_b = TestWorker.new_jobs(n_jobs_b, prefix="b") task_type_b = FakeTaskType([True] * n_jobs_b) cms.service.Worker.get_task_type = Mock(return_value=task_type_b) for job in jobs_b: job_group = JobGroup([job]) JobGroup.import_from_dict( self.service.execute_job_group(job_group.export_to_dict())) cms.service.Worker.get_task_type.assert_has_calls(calls_b) self.assertEquals(task_type_b.call_count, n_jobs_b)
def test_execute_job_failure_releases_lock(self): """After a failure, the worker should be able to accept another job. """ n_jobs_a = 1 jobs_a, calls_a = TestWorker.new_jobs(n_jobs_a) task_type_a = FakeTaskType([Exception()]) cms.service.Worker.get_task_type = Mock(return_value=task_type_a) with self.assertRaises(JobException): job_group = JobGroup([jobs_a[0]]) JobGroup.import_from_dict( self.service.execute_job_group(job_group.export_to_dict())) cms.service.Worker.get_task_type.assert_has_calls(calls_a) self.assertEquals(task_type_a.call_count, n_jobs_a) n_jobs_b = 3 jobs_b, calls_b = TestWorker.new_jobs(n_jobs_b) task_type_b = FakeTaskType([True] * n_jobs_b) cms.service.Worker.get_task_type = Mock(return_value=task_type_b) for job in jobs_b: job_group = JobGroup([job]) JobGroup.import_from_dict( self.service.execute_job_group(job_group.export_to_dict())) cms.service.Worker.get_task_type.assert_has_calls(calls_b) self.assertEquals(task_type_b.call_count, n_jobs_b)
def test_execute_job_group_subsequent_locked(self): """Executes a job group with one long job, then another one that should fail because of the lock. """ # Because of how gevent works, the interval here can be very small. task_type = FakeTaskType([0.01]) cms.service.Worker.get_task_type = Mock(return_value=task_type) jobgroup_a, calls_a = TestWorker.new_jobgroup(1, prefix="a") jobgroup_b, calls_b = TestWorker.new_jobgroup(1, prefix="b") def first_call(): JobGroup.import_from_dict( self.service.execute_job_group(jobgroup_a.export_to_dict())) first_greenlet = gevent.spawn(first_call) gevent.sleep(0) # To ensure we call jobgroup_a first. try: JobGroup.import_from_dict( self.service.execute_job_group(jobgroup_b.export_to_dict())) except JobException: # Expected pass else: self.fail("Expected JobException from the lock.") first_greenlet.get() cms.service.Worker.get_task_type.assert_has_calls( calls_a, any_order=True)
def test_execute_job_subsequent_locked(self): """Executes a long job, then another one that should fail because of the lock. """ # Because of how gevent works, the interval here can be very small. task_type = FakeTaskType([0.01]) cms.service.Worker.get_task_type = Mock(return_value=task_type) jobs_a, calls_a = TestWorker.new_jobs(1, prefix="a") jobs_b, calls_b = TestWorker.new_jobs(1, prefix="b") def first_call(): job_group = JobGroup([jobs_a[0]]) JobGroup.import_from_dict( self.service.execute_job_group(job_group.export_to_dict())) first_greenlet = gevent.spawn(first_call) gevent.sleep(0) # To ensure we call jobgroup_a first. with self.assertRaises(JobException): job_group = JobGroup([jobs_b[0]]) JobGroup.import_from_dict( self.service.execute_job_group(job_group.export_to_dict())) first_greenlet.get() self.assertNotIn(calls_b[0], cms.service.Worker.get_task_type.mock_calls) cms.service.Worker.get_task_type.assert_has_calls(calls_a)
def test_execute_job_group_subsequent_success(self): """Executes a job group with three successful jobs, then another one. """ jobgroup_a, calls_a = TestWorker.new_jobgroup(3, prefix="a") task_type_a = FakeTaskType([True, True, True]) cms.service.Worker.get_task_type = Mock(return_value=task_type_a) JobGroup.import_from_dict( self.service.execute_job_group(jobgroup_a.export_to_dict())) cms.service.Worker.get_task_type.assert_has_calls( calls_a, any_order=True) self.assertEquals(task_type_a.call_count, 3) jobgroup_b, calls_b = TestWorker.new_jobgroup(3, prefix="b") task_type_b = FakeTaskType([True, True, True]) cms.service.Worker.get_task_type = Mock(return_value=task_type_b) JobGroup.import_from_dict( self.service.execute_job_group(jobgroup_b.export_to_dict())) cms.service.Worker.get_task_type.assert_has_calls( calls_b, any_order=True) self.assertEquals(task_type_b.call_count, 3)
def test_execute_job_group_success(self): """Executes a job group with three successful jobs. """ jobgroup, calls = TestWorker.new_jobgroup(3) FakeTaskType.set_results([True] * 3) cms.service.Worker.get_task_type = Mock(return_value=FakeTaskType()) JobGroup.import_from_dict( self.service.execute_job_group(jobgroup.export_to_dict())) cms.service.Worker.get_task_type.assert_has_calls( calls, any_order=True)
def test_execute_job_group_success(self): """Executes a job group with three successful jobs. """ jobgroup, calls = TestWorker.new_jobgroup(3) task_type = FakeTaskType([True, True, True]) cms.service.Worker.get_task_type = Mock(return_value=task_type) JobGroup.import_from_dict( self.service.execute_job_group(jobgroup.export_to_dict())) cms.service.Worker.get_task_type.assert_has_calls( calls, any_order=True) self.assertEquals(task_type.call_count, 3)
def test_execute_job_group_success(self): """Executes two successful job groups. """ n_jobs = [3, 3] job_groups, calls = TestWorker.new_job_groups(n_jobs) task_type = FakeTaskType([True] * sum(n_jobs)) cms.service.Worker.get_task_type = Mock(return_value=task_type) for job_group in job_groups: JobGroup.import_from_dict( self.service.execute_job_group(job_group.export_to_dict())) cms.service.Worker.get_task_type.assert_has_calls(calls) self.assertEquals(task_type.call_count, sum(n_jobs))
def test_execute_job_group_mixed_exceptions(self): """Executes a job group with some exceptions. """ n_jobs = 4 expected_success = [True, Exception(), False, True] self.assertEquals(n_jobs, len(expected_success)) job_groups, unused_calls = TestWorker.new_job_groups([n_jobs]) task_type = FakeTaskType(expected_success) cms.service.Worker.get_task_type = Mock(return_value=task_type) with self.assertRaises(JobException): JobGroup.import_from_dict( self.service.execute_job_group(job_groups[0].export_to_dict()))
def test_execute_job_group_mixed_success(self): """Executes three job groups with mixed grades of success. """ n_jobs = [4, 4, 4] expected_success = ( [True] * n_jobs[0] + [False] + [True] * (n_jobs[1] - 1) + [False] * n_jobs[2]) self.assertEquals(sum(n_jobs), len(expected_success)) job_groups, calls = TestWorker.new_job_groups(n_jobs) task_type = FakeTaskType(expected_success) cms.service.Worker.get_task_type = Mock(return_value=task_type) results = [] for job_group in job_groups: results.append(JobGroup.import_from_dict( self.service.execute_job_group(job_group.export_to_dict()))) expected_idx = 0 for result in results: for job in result.jobs: self.assertIs(expected_success[expected_idx], job.success) expected_idx += 1 cms.service.Worker.get_task_type.assert_has_calls(calls) self.assertEquals(task_type.call_count, sum(n_jobs))
def execute_job_group(self, job_group_dict): """Receive a group of jobs in a dict format and executes them one by one. job_group_dict (dict): a dictionary suitable to be imported from JobGroup. """ job_group = JobGroup.import_from_dict(job_group_dict) if self.work_lock.acquire(False): try: self._ignore_job = False for k, job in job_group.jobs.iteritems(): logger.info("Starting job.", extra={"operation": job.info}) job.shard = self.shard # FIXME This is actually kind of a workaround... # The only TaskType that needs it is OutputOnly. job._key = k # FIXME We're creating a new TaskType for each Job # even if, at the moment, a JobGroup always uses # the same TaskType and the same parameters. Yet, # this could change in the future, so the best # solution is to keep a cache of TaskTypes objects # (like ScoringService does with ScoreTypes, except # that we cannot index by Dataset ID here...). task_type = get_task_type(job.task_type, job.task_type_parameters) task_type.execute_job(job, self.file_cacher) logger.info("Finished job.", extra={"operation": job.info}) if not job.success or self._ignore_job: job_group.success = False break else: job_group.success = True return job_group.export_to_dict() except: err_msg = "Worker failed." logger.error(err_msg, exc_info=True) raise JobException(err_msg) finally: self.work_lock.release() else: err_msg = "Request received, but declined because of acquired " \ "lock (Worker is busy executing another job group, this " \ "should not happen: check if there are more than one ES " \ "running, or for bugs in ES." logger.warning(err_msg) raise JobException(err_msg)
def test_execute_job_tasktype_raise(self): """Executes two jobs raising exceptions. """ n_jobs = 2 jobs, unused_calls = TestWorker.new_jobs(n_jobs) task_type = FakeTaskType([Exception(), Exception()]) cms.service.Worker.get_task_type = Mock(return_value=task_type) for job in jobs: with self.assertRaises(JobException): job_group = JobGroup([job]) JobGroup.import_from_dict( self.service.execute_job_group(job_group.export_to_dict())) self.assertEquals(cms.service.Worker.get_task_type.call_count, n_jobs) self.assertEquals(task_type.call_count, n_jobs)
def execute_job_group(self, job_group_dict): """Receive a group of jobs in a list format and executes them one by one. job_group_dict ({}): a JobGroup exported to dict. return ({}): the same JobGroup in dict format, but containing the results. """ start_time = time.time() job_group = JobGroup.import_from_dict(job_group_dict) if self.work_lock.acquire(False): try: logger.info("Starting job group.") for job in job_group.jobs: logger.info("Starting job.", extra={"operation": job.info}) job.shard = self.shard if self._fake_worker_time is None: task_type = get_task_type(job.task_type, job.task_type_parameters) try: task_type.execute_job(job, self.file_cacher) except TombstoneError: job.success = False job.plus = {"tombstone": True} else: self._fake_work(job) logger.info("Finished job.", extra={"operation": job.info}) logger.info("Finished job group.") return job_group.export_to_dict() except Exception as e: err_msg = "Worker failed: %s." % e logger.error(err_msg, exc_info=True) raise JobException(err_msg) finally: self._finalize(start_time) self.work_lock.release() else: err_msg = "Request received, but declined because of acquired " \ "lock (Worker is busy executing another job, this should " \ "not happen: check if there are more than one ES running, " \ "or for bugs in ES." logger.warning(err_msg) self._finalize(start_time) raise JobException(err_msg)
def test_execute_job_group_tasktype_raise(self): """Executes a job group with three jobs raising exceptions. """ jobgroup, unused_calls = TestWorker.new_jobgroup(2) FakeTaskType.set_results([Exception()] * 2) cms.service.Worker.get_task_type = Mock(return_value=FakeTaskType()) try: JobGroup.import_from_dict( self.service.execute_job_group(jobgroup.export_to_dict())) except JobException: # Expected pass else: self.fail("Expected JobException from the tasktype.") # Does not continue after failure. self.assertEquals(cms.service.Worker.get_task_type.call_count, 1)
def test_execute_job_group_subsequent_success(self): """Executes a job group with three successful jobs, then another one. """ jobgroup_a, calls_a = TestWorker.new_jobgroup(3) FakeTaskType.set_results([True] * 3) cms.service.Worker.get_task_type = Mock(return_value=FakeTaskType()) JobGroup.import_from_dict( self.service.execute_job_group(jobgroup_a.export_to_dict())) jobgroup_b, calls_b = TestWorker.new_jobgroup(3) FakeTaskType.set_results([True] * 3) JobGroup.import_from_dict( self.service.execute_job_group(jobgroup_b.export_to_dict())) cms.service.Worker.get_task_type.assert_has_calls( calls_a + calls_b, any_order=True)
def execute_job_group(self, job_group_dict): job_group = JobGroup.import_from_dict(job_group_dict) if self.work_lock.acquire(False): try: self.ignore_job = False for k, job in job_group.jobs.iteritems(): logger.operation = "job '%s'" % (job.info) logger.info("Request received") job.shard = self.shard # FIXME This is actually kind of a workaround... # The only TaskType that needs it is OutputOnly. job._key = k # FIXME We're creating a new TaskType for each Job # even if, at the moment, a JobGroup always uses # the same TaskType and the same parameters. Yet, # this could change in the future, so the best # solution is to keep a cache of TaskTypes objects # (like ScoringService does with ScoreTypes, except # that we cannot index by Dataset ID here...). task_type = get_task_type(job.task_type, job.task_type_parameters) task_type.execute_job(job, self.file_cacher) logger.info("Request finished.") if not job.success or self.ignore_job: job_group.success = False break else: job_group.success = True return job_group.export_to_dict() except: err_msg = "Worker failed on operation `%s'" % logger.operation logger.error("%s\n%s" % (err_msg, traceback.format_exc())) raise JobException(err_msg) finally: logger.operation = "" self.work_lock.release() else: err_msg = "Request '%s' received, " \ "but declined because of acquired lock" % \ (job.info) logger.warning(err_msg) raise JobException(err_msg)
def test_execute_job_group_tasktype_raise(self): """Executes a job group with three jobs raising exceptions. """ jobgroup, unused_calls = TestWorker.new_jobgroup(2) task_type = FakeTaskType([Exception(), Exception()]) cms.service.Worker.get_task_type = Mock(return_value=task_type) try: JobGroup.import_from_dict( self.service.execute_job_group(jobgroup.export_to_dict())) except JobException: # Expected pass else: self.fail("Expected JobException from the tasktype.") # Does not continue after failure so just one call. self.assertEquals(cms.service.Worker.get_task_type.call_count, 1) self.assertEquals(task_type.call_count, 1)
def action_finished(self, data, shard, error=None): """Callback from a worker, to signal that is finished some action (compilation or evaluation). data (dict): the JobGroup, exported to dict. shard (int): the shard finishing the action. """ # We notify the pool that the worker is available again for # further work (no matter how the current request turned out, # even if the worker encountered an error). If the pool # informs us that the data produced by the worker has to be # ignored (by returning True) we interrupt the execution of # this method and do nothing because in that case we know the # operation has returned to the queue and perhaps already been # reassigned to another worker. to_ignore = self.get_executor().pool.release_worker(shard) if to_ignore is True: logger.info("Ignored result from worker %s as requested.", shard) return job_group = None job_group_success = True if error is not None: logger.error( "Received error from Worker (see above), job group lost.") job_group_success = False else: try: job_group = JobGroup.import_from_dict(data) except Exception: logger.error("Couldn't build JobGroup for data %s.", data, exc_info=True) job_group_success = False if job_group_success: for job in job_group.jobs: operation = job.operation if job.success: logger.info("`%s' succeeded.", operation) else: logger.error( "`%s' failed, see worker logs and (possibly) " "sandboxes at '%s'.", operation, " ".join(job.sandboxes)) if isinstance(to_ignore, list) and operation in to_ignore: logger.info("`%s' result ignored as requested", operation) else: self.result_cache.add(operation, Result(job, job.success))
def test_execute_job_group_jobs_failure(self): """Executes a job group with three unsuccessful jobs. """ jobgroup, unused_calls = TestWorker.new_jobgroup(2) FakeTaskType.set_results([False, False]) cms.service.Worker.get_task_type = Mock(return_value=FakeTaskType()) new_group = JobGroup.import_from_dict( self.service.execute_job_group(jobgroup.export_to_dict())) self.assertFalse(new_group.success) # Does not continue after failure. self.assertEquals(cms.service.Worker.get_task_type.call_count, 1)
def action_finished(self, data, shard, error=None): """Callback from a worker, to signal that is finished some action (compilation or evaluation). data (dict): the JobGroup, exported to dict. shard (int): the shard finishing the action. """ # We notify the pool that the worker is available again for # further work (no matter how the current request turned out, # even if the worker encountered an error). If the pool # informs us that the data produced by the worker has to be # ignored (by returning True) we interrupt the execution of # this method and do nothing because in that case we know the # operation has returned to the queue and perhaps already been # reassigned to another worker. to_ignore = self.get_executor().pool.release_worker(shard) if to_ignore is True: logger.info("Ignored result from worker %s as requested.", shard) return job_group = None job_group_success = True if error is not None: logger.error( "Received error from Worker (see above), job group lost.") job_group_success = False else: try: job_group = JobGroup.import_from_dict(data) except Exception: logger.error("Couldn't build JobGroup for data %s.", data, exc_info=True) job_group_success = False if job_group_success: for job in job_group.jobs: operation = ESOperation.from_dict(job.operation) if job.success: logger.info("`%s' succeeded.", operation) else: logger.error("`%s' failed, see worker logs and (possibly) " "sandboxes at '%s'.", operation, " ".join(job.sandboxes)) if isinstance(to_ignore, list) and operation in to_ignore: logger.info("`%s' result ignored as requested", operation) else: self.result_cache.add(operation, Result(job, job.success))
def test_execute_job_group_jobs_failure(self): """Executes a job group with three unsuccessful jobs. """ jobgroup, unused_calls = TestWorker.new_jobgroup(2) task_type = FakeTaskType([False, False]) cms.service.Worker.get_task_type = Mock(return_value=task_type) new_group = JobGroup.import_from_dict( self.service.execute_job_group(jobgroup.export_to_dict())) self.assertFalse(new_group.success) # Does not continue after failure, so just one call. self.assertEquals(cms.service.Worker.get_task_type.call_count, 1) self.assertEquals(task_type.call_count, 1)
def test_execute_job_success(self): """Executes three successful jobs. """ n_jobs = 3 jobs, calls = TestWorker.new_jobs(n_jobs) task_type = FakeTaskType([True] * n_jobs) cms.service.Worker.get_task_type = Mock(return_value=task_type) for job in jobs: job_group = JobGroup([job]) ret_job_group = JobGroup.import_from_dict( self.service.execute_job_group(job_group.export_to_dict())) self.assertTrue(ret_job_group.jobs[0].success) cms.service.Worker.get_task_type.assert_has_calls(calls) self.assertEquals(task_type.call_count, n_jobs)
def test_execute_job_failure(self): """Executes two unsuccessful jobs. """ n_jobs = 2 jobs, unused_calls = TestWorker.new_jobs(n_jobs) task_type = FakeTaskType([False] * n_jobs) cms.service.Worker.get_task_type = Mock(return_value=task_type) results = [] for job in jobs: job_group = JobGroup([job]) results.append(JobGroup.import_from_dict( self.service.execute_job_group(job_group.export_to_dict()))) for job_group in results: for job in job_group.jobs: self.assertFalse(job.success) self.assertEquals(cms.service.Worker.get_task_type.call_count, n_jobs) self.assertEquals(task_type.call_count, n_jobs)
def first_call(): job_group = JobGroup([jobs_a[0]]) JobGroup.import_from_dict( self.service.execute_job_group(job_group.export_to_dict()))
def first_call(): JobGroup.import_from_dict( self.service.execute_job_group(jobgroup.export_to_dict()))
def execute_job_group(self, job_group_dict): """Receive a group of jobs in a dict format and executes them one by one. job_group_dict (dict): a dictionary suitable to be imported from JobGroup. """ job_group = JobGroup.import_from_dict(job_group_dict) if self.work_lock.acquire(False): try: self._ignore_job = False for k, job in job_group.jobs.iteritems(): logger.info("Starting job.", extra={"operation": job.info}) #self.rpc_test(job_group_dict) job.shard = self.shard # FIXME This is actually kind of a workaround... # The only TaskType that needs it is OutputOnly. job._key = k # FIXME We're creating a new TaskType for each Job # even if, at the moment, a JobGroup always uses # the same TaskType and the same parameters. Yet, # this could change in the future, so the best # solution is to keep a cache of TaskTypes objects # (like ScoringService does with ScoreTypes, except # that we cannot index by Dataset ID here...). task_type = get_task_type(job.task_type, job.task_type_parameters) task_type.execute_job(job, self.file_cacher) logger.info("Finished job.", extra={"operation": job.info}) if not job.success or self._ignore_job: job_group.success = False break else: job_group.success = True return job_group.export_to_dict() except: err_msg = "Worker failed." logger.error(err_msg, exc_info=True) raise JobException(err_msg) finally: self.work_lock.release() else: err_msg = "Request received, but declined because of acquired " \ "lock (Worker is busy executing another job group, this " \ "should not happen: check if there are more than one ES " \ "running, or for bugs in ES." logger.warning(err_msg) raise JobException(err_msg)