def test_calculate_filtered(self): """Tests generating metrics with only certain errors.""" job_test_utils.create_job(status='FAILED') job_test_utils.create_job(status='COMPLETED') error1 = error_test_utils.create_error(is_builtin=True) job1 = job_test_utils.create_job(error=error1, status='FAILED', ended=datetime.datetime(2015, 1, 1)) job_test_utils.create_job_exe(job=job1, error=error1, status=job1.status, ended=job1.ended) error2 = error_test_utils.create_error() job2 = job_test_utils.create_job(error=error2, status='FAILED', ended=datetime.datetime(2015, 1, 1)) job_test_utils.create_job_exe(error=error2, status=job2.status, ended=job2.ended) job3 = job_test_utils.create_job(status='COMPLETED', ended=datetime.datetime(2015, 1, 1)) job_test_utils.create_job_exe(job=job3, status=job3.status, ended=job3.ended) MetricsError.objects.calculate(datetime.date(2015, 1, 1)) entries = MetricsError.objects.filter( occurred=datetime.date(2015, 1, 1)) self.assertEqual(len(entries), 1)
def setUp(self): django.setup() Error.objects.all().delete() # Need to remove initial errors loaded by fixtures self.error1 = error_test_utils.create_error(category='SYSTEM') self.error2 = error_test_utils.create_error(category='ALGORITHM') self.error3 = error_test_utils.create_error(category='DATA')
def setUp(self): django.setup() Error.objects.all().delete() # Need to remove initial errors loaded by fixtures self.error1 = error_test_utils.create_error(category='SYSTEM', is_builtin=True) self.error2 = error_test_utils.create_error(category='ALGORITHM') self.error3 = error_test_utils.create_error(category='DATA')
def setUp(self): django.setup() self.alg_error = error_test_utils.create_error(category='ALGORITHM') self.data_error = error_test_utils.create_error(category='DATA') self.system_error = error_test_utils.create_error(category='SYSTEM') self.metrics = TotalJobExeMetrics(now())
def test_get_metrics_type_choices(self): """Tests getting the metrics type with choices.""" error_test_utils.create_error(is_builtin=True) metrics_type = MetricsError.objects.get_metrics_type(include_choices=True) self.assertEqual(metrics_type.name, 'errors') self.assertEqual(len(metrics_type.filters), 2) self.assertEqual(len(metrics_type.choices), 1)
def setUp(self): django.setup() self.error_1 = error_test_utils.create_error(name='unknown', category='SYSTEM') self.error_2 = error_test_utils.create_error(name='database', category='SYSTEM') self.error_3 = error_test_utils.create_error(name='timeout', category='ALGORITHM')
def setUp(self): django.setup() self.error_1 = error_test_utils.create_error(name='error_1', category='SYSTEM') self.error_2 = error_test_utils.create_error(name='error_2', category='SYSTEM') self.error_3 = error_test_utils.create_error(name='error_3', category='ALGORITHM')
def setUp(self): django.setup() rest.login_client(self.client, is_staff=True) Error.objects.all().delete( ) # Need to remove initial errors loaded by fixtures self.error1 = error_test_utils.create_error(category='SYSTEM', is_builtin=True) self.error2 = error_test_utils.create_error(category='ALGORITHM') self.error3 = error_test_utils.create_error(category='DATA')
def test_calculate_stats(self): """Tests calculating individual statistics for a metrics entry.""" error = error_test_utils.create_error(is_builtin=True) job1 = job_test_utils.create_job(error=error, status='FAILED', ended=datetime.datetime(2015, 1, 1)) job_test_utils.create_job_exe( job=job1, error=error, status=job1.status, queued=datetime.datetime(2015, 1, 1, tzinfo=timezone.utc), started=datetime.datetime(2015, 1, 1, 0, 10, 2, tzinfo=timezone.utc), pre_started=datetime.datetime(2015, 1, 1, 0, 30, 4, tzinfo=timezone.utc), pre_completed=datetime.datetime(2015, 1, 1, 1, 6, tzinfo=timezone.utc), job_started=datetime.datetime(2015, 1, 1, 1, 40, 8, tzinfo=timezone.utc), job_completed=datetime.datetime(2015, 1, 1, 2, 30, 10, tzinfo=timezone.utc), post_started=datetime.datetime(2015, 1, 1, 3, 30, 12, tzinfo=timezone.utc), post_completed=datetime.datetime(2015, 1, 1, 4, 40, 14, tzinfo=timezone.utc), ended=datetime.datetime(2015, 1, 1, 6, 0, 16, tzinfo=timezone.utc), ) job2 = job_test_utils.create_job(error=error, status='FAILED', ended=datetime.datetime(2015, 1, 1)) job_test_utils.create_job_exe( job=job2, error=error, status=job2.status, queued=datetime.datetime(2015, 1, 1, tzinfo=timezone.utc), started=datetime.datetime(2015, 1, 1, 2, 10, 2, tzinfo=timezone.utc), pre_started=datetime.datetime(2015, 1, 1, 4, 30, 4, tzinfo=timezone.utc), pre_completed=datetime.datetime(2015, 1, 1, 6, 0, 8, tzinfo=timezone.utc), job_started=datetime.datetime(2015, 1, 1, 8, 40, 14, tzinfo=timezone.utc), job_completed=datetime.datetime(2015, 1, 1, 10, 30, 22, tzinfo=timezone.utc), post_started=datetime.datetime(2015, 1, 1, 12, 30, 32, tzinfo=timezone.utc), post_completed=datetime.datetime(2015, 1, 1, 14, 40, 44, tzinfo=timezone.utc), ended=datetime.datetime(2015, 1, 1, 16, 0, 58, tzinfo=timezone.utc), ) sys_error = error_test_utils.create_error(category='SYSTEM', is_builtin=True) job3a = job_test_utils.create_job(error=sys_error, status='FAILED', ended=datetime.datetime(2015, 1, 1)) job_test_utils.create_job_exe(job=job3a, status=job3a.status, ended=job3a.ended, error=sys_error) data_error = error_test_utils.create_error(category='DATA', is_builtin=True) job3b = job_test_utils.create_job(error=data_error, status='FAILED', ended=datetime.datetime(2015, 1, 1)) job_test_utils.create_job_exe(job=job3b, status=job3b.status, ended=job3b.ended, error=data_error) algo_error = error_test_utils.create_error(category='ALGORITHM', is_builtin=True) job3c = job_test_utils.create_job(error=algo_error, status='FAILED', ended=datetime.datetime(2015, 1, 1)) job_test_utils.create_job_exe(job=job3c, status=job3c.status, ended=job3c.ended, error=algo_error) MetricsError.objects.calculate(datetime.date(2015, 1, 1)) entries = MetricsError.objects.filter(occurred=datetime.date(2015, 1, 1)) self.assertEqual(len(entries), 4) for entry in entries: self.assertEqual(entry.occurred, datetime.date(2015, 1, 1)) if entry.error == error: self.assertEqual(entry.total_count, 2) else: self.assertEqual(entry.total_count, 1)
def setUp(self): django.setup() self.error_1 = error_test_utils.create_error(name='error_1', category='SYSTEM') self.error_2 = error_test_utils.create_error(name='error_2', category='SYSTEM') self.error_3 = error_test_utils.create_error(name='error_3', category='ALGORITHM') # Clear error cache so tests work correctly reset_error_cache()
def setUp(self): django.setup() self.scheduler = Scheduler.objects.create(id=1, master_hostname='master', master_port=5050) self.node1 = node_test_utils.create_node() self.node2 = node_test_utils.create_node() self.node3 = node_test_utils.create_node() self.job = job_test_utils.create_job(status='COMPLETED') data_error = error_test_utils.create_error(category='DATA') system_error = error_test_utils.create_error(category='SYSTEM') job_exe_1 = job_test_utils.create_job_exe(job=self.job, status='FAILED', error=data_error, node=self.node2) job_exe_1.created = now() - timedelta(hours=3) job_exe_1.job_completed = now() - timedelta(hours=2) job_exe_1.save() job_exe_2 = job_test_utils.create_job_exe(job=self.job, status='FAILED', error=system_error, node=self.node2) job_exe_2.created = now() - timedelta(hours=3) job_exe_2.job_completed = now() - timedelta(hours=2) job_exe_2.save() job_exe_3 = job_test_utils.create_job_exe(job=self.job, status='FAILED', error=system_error, node=self.node1) job_exe_3.created = now() - timedelta(hours=2) job_exe_3.job_completed = now() - timedelta(hours=1) job_exe_3.save() job_exe_4 = job_test_utils.create_job_exe(job=self.job, status='COMPLETED', node=self.node1) job_exe_4.created = now() - timedelta(hours=1) job_exe_4.job_completed = now() job_exe_4.save() job_exe_5 = job_test_utils.create_job_exe(job=self.job, status='RUNNING', node=self.node3) job_exe_5.created = now() job_exe_5.save()
def test_requeue_failed(self,): """Tests calling the requeue view successfully for a job that was previously queued.""" job_test_utils.create_job_exe(job=self.job_2, status='FAILED') job_test_utils.create_job_exe(job=self.job_2, status='FAILED') # make sure the job is in the right state despite not actually having been run Job.objects.update_status([self.job_2], 'FAILED', timezone.now(), error_test_utils.create_error()) self.job_2.num_exes = 2 self.job_2.save() base_count = Queue.objects.count() json_data = { 'job_id': self.job_2.id, } url = '/queue/requeue-job/' response = self.client.post(url, json.dumps(json_data), 'application/json') result = json.loads(response.content) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(result['id'], self.job_2.id) self.assertEqual(result['status'], 'QUEUED') self.assertEqual(len(result['job_exes']), 3) job = Job.objects.get(id=self.job_2.id) self.assertEqual(Queue.objects.count() - base_count, 1)
def test_json(self): """Tests coverting a RequeueJobsBulk message to and from JSON""" sys_err = error_test_utils.create_error(category='SYSTEM') data = JobData() job_type = job_test_utils.create_job_type() job_1 = job_test_utils.create_job(job_type=job_type, num_exes=3, status='FAILED', error=sys_err, input=data.get_dict()) job_2 = job_test_utils.create_job(job_type=job_type, num_exes=3, status='CANCELED', error=sys_err, input=data.get_dict()) # Create message message = RequeueJobsBulk() message.started = job_1.last_modified - timedelta(seconds=1) message.ended = job_1.last_modified + timedelta(seconds=1) message.error_categories = ['SYSTEM'] message.error_ids = [sys_err.id] message.job_ids = [job_1.id] message.job_type_ids = [job_type.id] message.priority = 1 message.status = 'FAILED' # Convert message to JSON and back, and then execute message_json_dict = message.to_json() new_message = RequeueJobsBulk.from_json(message_json_dict) result = new_message.execute() self.assertTrue(result) # Should be one re-queue message for job 1 self.assertEqual(len(new_message.new_messages), 1) message = new_message.new_messages[0] self.assertEqual(message.type, 'requeue_jobs') self.assertListEqual(message._requeue_jobs, [QueuedJob(job_1.id, job_1.num_exes)]) self.assertEqual(message.priority, 1)
def test_get_plot_data_filtered(self): """Tests getting the metrics plot data with filters.""" error = error_test_utils.create_error(is_builtin=True) metrics_test_utils.create_error(error=error, occurred=datetime.datetime(2015, 1, 1, tzinfo=utc), total_count=1) metrics_test_utils.create_error(error=error, occurred=datetime.datetime(2015, 1, 20, tzinfo=utc), total_count=1) metrics_test_utils.create_error(occurred=datetime.datetime(2015, 1, 1, tzinfo=utc), total_count=1) plot_data = MetricsError.objects.get_plot_data( started=datetime.datetime(2015, 1, 1, tzinfo=utc), ended=datetime.datetime(2015, 1, 10, tzinfo=utc), choice_ids=[error.id], columns=[MetricsTypeColumn('total_count')]) self.assertEqual(len(plot_data), 1) self.assertEqual(len(plot_data[0].values), 1)
def test_priority(self): """Tests successfully calling the requeue view changing the job priority.""" job_test_utils.create_job_exe(job=self.job_2, status='FAILED') job_test_utils.create_job_exe(job=self.job_2, status='FAILED') # make sure the job is in the right state despite not actually having been run Job.objects.update_status([self.job_2], 'FAILED', timezone.now(), error_test_utils.create_error()) self.job_2.num_exes = 2 self.job_2.save() json_data = { 'job_ids': [self.job_2.id], 'priority': 123, } url = rest_util.get_url('/queue/requeue-jobs/') response = self.client.post(url, json.dumps(json_data), 'application/json') self.assertEqual(response.status_code, status.HTTP_200_OK, response.content) result = json.loads(response.content) self.assertEqual(len(result['results']), 1) self.assertEqual(result['results'][0]['id'], self.job_2.id) self.assertEqual(result['results'][0]['status'], 'QUEUED') self.assertEqual(result['results'][0]['priority'], 123)
def test_priority(self): """Tests successfully calling the requeue view changing the job priority.""" job_test_utils.create_job_exe(job=self.job_2, status='FAILED') job_test_utils.create_job_exe(job=self.job_2, status='FAILED') # make sure the job is in the right state despite not actually having been run Job.objects.update_status([self.job_2], 'FAILED', timezone.now(), error_test_utils.create_error()) self.job_2.num_exes = 2 self.job_2.save() json_data = { 'job_ids': [self.job_2.id], 'priority': 123, } url = '/queue/requeue-jobs/' response = self.client.post(url, json.dumps(json_data), 'application/json') result = json.loads(response.content) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(len(result['results']), 1) self.assertEqual(result['results'][0]['id'], self.job_2.id) self.assertEqual(result['results'][0]['status'], 'QUEUED') self.assertEqual(result['results'][0]['priority'], 123)
def test_requeue_failed(self, ): """Tests calling the requeue view successfully for a job that was previously queued.""" job_test_utils.create_job_exe(job=self.job_2, status='FAILED') job_test_utils.create_job_exe(job=self.job_2, status='FAILED') # make sure the job is in the right state despite not actually having been run Job.objects.update_status([self.job_2], 'FAILED', timezone.now(), error_test_utils.create_error()) self.job_2.num_exes = 2 self.job_2.save() base_count = Queue.objects.count() json_data = { 'job_ids': [self.job_2.id], } url = rest_util.get_url('/queue/requeue-jobs/') response = self.client.post(url, json.dumps(json_data), 'application/json') self.assertEqual(response.status_code, status.HTTP_200_OK, response.content) result = json.loads(response.content) self.assertEqual(len(result['results']), 1) self.assertEqual(result['results'][0]['id'], self.job_2.id) self.assertEqual(result['results'][0]['status'], 'QUEUED') self.assertEqual(Queue.objects.count() - base_count, 1)
def test_calculate_stats(self): """Tests calculating individual statistics for a metrics entry.""" error = error_test_utils.create_error(is_builtin=True) job1 = job_test_utils.create_job(error=error, status='FAILED', ended=datetime.datetime(2015, 1, 1, tzinfo=utc)) job_test_utils.create_job_exe( job=job1, error=error, status=job1.status, queued=datetime.datetime(2015, 1, 1, tzinfo=utc), started=datetime.datetime(2015, 1, 1, 0, 10, 2, tzinfo=utc), ended=datetime.datetime(2015, 1, 1, 6, 0, 16, tzinfo=utc), ) job2 = job_test_utils.create_job(error=error, status='FAILED', ended=datetime.datetime(2015, 1, 1, tzinfo=utc)) job_test_utils.create_job_exe( job=job2, error=error, status=job2.status, queued=datetime.datetime(2015, 1, 1, tzinfo=utc), started=datetime.datetime(2015, 1, 1, 2, 10, 2, tzinfo=utc), ended=datetime.datetime(2015, 1, 1, 16, 0, 58, tzinfo=utc), ) sys_error = error_test_utils.create_error(category='SYSTEM', is_builtin=True) job3a = job_test_utils.create_job(error=sys_error, status='FAILED', ended=datetime.datetime(2015, 1, 1, tzinfo=utc)) job_test_utils.create_job_exe(job=job3a, status=job3a.status, ended=job3a.ended, error=sys_error) data_error = error_test_utils.create_error(category='DATA', is_builtin=True) job3b = job_test_utils.create_job(error=data_error, status='FAILED', ended=datetime.datetime(2015, 1, 1, tzinfo=utc)) job_test_utils.create_job_exe(job=job3b, status=job3b.status, ended=job3b.ended, error=data_error) algo_error = error_test_utils.create_error(category='ALGORITHM', is_builtin=True) job3c = job_test_utils.create_job(error=algo_error, status='FAILED', ended=datetime.datetime(2015, 1, 1, tzinfo=utc)) job_test_utils.create_job_exe(job=job3c, status=job3c.status, ended=job3c.ended, error=algo_error) MetricsError.objects.calculate(datetime.date(2015, 1, 1)) entries = MetricsError.objects.filter(occurred=datetime.date(2015, 1, 1)) self.assertEqual(len(entries), 4) for entry in entries: self.assertEqual(entry.occurred, datetime.date(2015, 1, 1)) if entry.error == error: self.assertEqual(entry.total_count, 2) else: self.assertEqual(entry.total_count, 1)
def test_calculate_repeated(self): """Tests regenerating metrics for a date that already has metrics.""" error = error_test_utils.create_error(is_builtin=True) job = job_test_utils.create_job(status='FAILED', error=error, ended=datetime.datetime(2015, 1, 1, tzinfo=utc)) job_test_utils.create_job_exe(job=job, error=error, status=job.status, ended=job.ended) MetricsError.objects.calculate(datetime.date(2015, 1, 1)) MetricsError.objects.calculate(datetime.date(2015, 1, 1)) entries = MetricsError.objects.filter(occurred=datetime.date(2015, 1, 1)) self.assertEqual(len(entries), 1)
def test_calculate_filtered(self): """Tests generating metrics with only certain errors.""" job_test_utils.create_job(status='FAILED') job_test_utils.create_job(status='COMPLETED') error1 = error_test_utils.create_error(is_builtin=True) job1 = job_test_utils.create_job(error=error1, status='FAILED', ended=datetime.datetime(2015, 1, 1)) job_test_utils.create_job_exe(job=job1, error=error1, status=job1.status, ended=job1.ended) error2 = error_test_utils.create_error() job2 = job_test_utils.create_job(error=error2, status='FAILED', ended=datetime.datetime(2015, 1, 1)) job_test_utils.create_job_exe(error=error2, status=job2.status, ended=job2.ended) job3 = job_test_utils.create_job(status='COMPLETED', ended=datetime.datetime(2015, 1, 1)) job_test_utils.create_job_exe(job=job3, status=job3.status, ended=job3.ended) MetricsError.objects.calculate(datetime.date(2015, 1, 1)) entries = MetricsError.objects.filter(occurred=datetime.date(2015, 1, 1)) self.assertEqual(len(entries), 1)
def test_calculate_repeated(self): """Tests regenerating metrics for a date that already has metrics.""" error = error_test_utils.create_error(is_builtin=True) job = job_test_utils.create_job(status='FAILED', error=error, ended=datetime.datetime(2015, 1, 1)) job_test_utils.create_job_exe(job=job, error=error, status=job.status, ended=job.ended) MetricsError.objects.calculate(datetime.date(2015, 1, 1)) MetricsError.objects.calculate(datetime.date(2015, 1, 1)) entries = MetricsError.objects.filter(occurred=datetime.date(2015, 1, 1)) self.assertEqual(len(entries), 1)
def test_update_status_failed(self): """Tests that job attributes are updated when a job is failed.""" job = job_test_utils.create_job(num_exes=1, started=timezone.now(), ended=timezone.now()) error = error_test_utils.create_error() self.assertRaises(Exception, Job.objects.update_status, [job], 'FAILED', timezone.now()) self.assertRaises(Exception, Job.objects.update_status, [job], 'RUNNING', timezone.now(), error) Job.objects.update_status([job], 'FAILED', timezone.now(), error) self.assertEqual(job.status, 'FAILED') self.assertIsNotNone(job.ended)
def create_error(error=None, occurred=None, **kwargs): """Creates a metrics ingest model for unit testing :returns: The metrics ingest model :rtype: :class:`metrics.models.MetricsIngest` """ if not error: error = error_test_utils.create_error(is_builtin=True) if not occurred: occurred = timezone.now() return MetricsError.objects.create(error=error, occurred=occurred, **kwargs)
def create_error(error=None, occurred=None, **kwargs): '''Creates a metrics ingest model for unit testing :returns: The metrics ingest model :rtype: :class:`metrics.models.MetricsIngest` ''' if not error: error = error_test_utils.create_error(is_builtin=True) if not occurred: occurred = timezone.now() return MetricsError.objects.create(error=error, occurred=occurred, **kwargs)
def setUp(self): django.setup() Error.objects.all().delete() # Need to remove initial errors loaded by fixtures error_test_utils.create_error(category='SYSTEM') error_test_utils.create_error(category='ALGORITHM') error_test_utils.create_error(category='DATA')
def setUp(self): django.setup() Error.objects.all().delete( ) # Need to remove initial errors loaded by fixtures error_test_utils.create_error(category='SYSTEM', is_builtin=True) error_test_utils.create_error(category='ALGORITHM') error_test_utils.create_error(category='DATA')
def test_execute(self): """Tests calling CancelJobsBulk.execute() successfully""" # Importing module here to patch the max batch size import job.messages.cancel_jobs_bulk job.messages.cancel_jobs_bulk.MAX_BATCH_SIZE = 5 sys_err = error_test_utils.create_error(category='SYSTEM') job_type = job_test_utils.create_seed_job_type() job_1 = job_test_utils.create_job(job_type=job_type, num_exes=3, status='FAILED', error=sys_err) job_2 = job_test_utils.create_job(job_type=job_type, num_exes=3, status='FAILED', error=sys_err) job_3 = job_test_utils.create_job(job_type=job_type, num_exes=1, status='COMPLETED') job_4 = job_test_utils.create_job(job_type=job_type, status='BLOCKED') job_5 = job_test_utils.create_job(job_type=job_type, num_exes=3, status='CANCELED') job_6 = job_test_utils.create_job(job_type=job_type, status='PENDING') job_7 = job_test_utils.create_job(job_type=job_type, num_exes=3, status='FAILED', error=sys_err) # Create message message = job.messages.cancel_jobs_bulk.CancelJobsBulk() message.job_type_ids = [job_type.id] # Execute message result = message.execute() self.assertTrue(result) # Should be two messages, one for next bulk cancel and one for canceling the specific jobs self.assertEqual(len(message.new_messages), 2) cancel_bulk_message = message.new_messages[0] cancel_message = message.new_messages[1] self.assertEqual(cancel_bulk_message.type, 'cancel_jobs_bulk') self.assertEqual(cancel_bulk_message.current_job_id, job_3.id) self.assertEqual(cancel_message.type, 'cancel_jobs') # Job 5 is skipped due to being CANCELED and job 3 is skipped due to being COMPLETED self.assertListEqual(cancel_message._job_ids, [job_7.id, job_6.id, job_4.id]) # Test executing message again message.new_messages = [] result = message.execute() self.assertTrue(result) # Should have same messages returned self.assertEqual(len(message.new_messages), 2) cancel_bulk_message = message.new_messages[0] cancel_message = message.new_messages[1] self.assertEqual(cancel_bulk_message.type, 'cancel_jobs_bulk') self.assertEqual(cancel_bulk_message.current_job_id, job_3.id) self.assertEqual(cancel_message.type, 'cancel_jobs') # Job 5 is skipped due to being CANCELED and job 3 is skipped due to being COMPLETED self.assertListEqual(cancel_message._job_ids, [job_7.id, job_6.id, job_4.id])
def test_get_plot_data_filtered(self): """Tests getting the metrics plot data with filters.""" error = error_test_utils.create_error(is_builtin=True) metrics_test_utils.create_error(error=error, occurred=datetime.date(2015, 1, 1), total_count=1) metrics_test_utils.create_error(error=error, occurred=datetime.date(2015, 1, 20), total_count=1) metrics_test_utils.create_error(occurred=datetime.date(2015, 1, 1), total_count=1) plot_data = MetricsError.objects.get_plot_data(started=datetime.date(2015, 1, 1), ended=datetime.date(2015, 1, 10), choice_ids=[error.id], columns=[MetricsTypeColumn('total_count')]) self.assertEqual(len(plot_data), 1) self.assertEqual(len(plot_data[0].values), 1)
def setUp(self): django.setup() self.scheduler = Scheduler.objects.create(id=1, master_hostname='master', master_port=5050) self.node1 = node_test_utils.create_node() self.node2 = node_test_utils.create_node() self.node3 = node_test_utils.create_node() self.job = job_test_utils.create_job(status=u'COMPLETED') data_error = error_test_utils.create_error(category=u'DATA') system_error = error_test_utils.create_error(category=u'SYSTEM') job_test_utils.create_job_exe(job=self.job, status=u'FAILED', error=data_error, node=self.node2, created=now() - timedelta(hours=3), job_completed=now() - timedelta(hours=2)) job_test_utils.create_job_exe(job=self.job, status=u'FAILED', error=system_error, node=self.node2, created=now() - timedelta(hours=3), job_completed=now() - timedelta(hours=2)) job_test_utils.create_job_exe(job=self.job, status=u'FAILED', error=system_error, node=self.node1, created=now() - timedelta(hours=2), job_completed=now() - timedelta(hours=1)) job_test_utils.create_job_exe(job=self.job, status=u'COMPLETED', node=self.node1, created=now() - timedelta(hours=1), job_completed=now()) job_test_utils.create_job_exe(job=self.job, status=u'RUNNING', node=self.node3, created=now())
def test_json(self): """Tests coverting a CancelJobsBulk message to and from JSON""" sys_err = error_test_utils.create_error(category='SYSTEM') data = JobData() batch = batch_test_utils.create_batch() recipe = recipe_test_utils.create_recipe() job_type = job_test_utils.create_job_type() job_1 = job_test_utils.create_job(job_type=job_type, num_exes=3, status='FAILED', error=sys_err, input=data.get_dict()) job_1.batch_id = batch.id job_1.recipe_id = recipe.id job_1.save() job_2 = job_test_utils.create_job(job_type=job_type, num_exes=3, status='FAILED', error=sys_err, input=data.get_dict()) # Create message message = CancelJobsBulk() message.started = job_1.last_modified - timedelta(seconds=1) message.ended = job_1.last_modified + timedelta(seconds=1) message.error_categories = ['SYSTEM'] message.error_ids = [sys_err.id] message.job_ids = [job_1.id] message.job_type_ids = [job_type.id] message.status = 'FAILED' message.job_type_names = [job_type.name] message.batch_ids = [batch.id] message.recipe_ids = [recipe.id] message.is_superseded = False # Convert message to JSON and back, and then execute message_json_dict = message.to_json() new_message = CancelJobsBulk.from_json(message_json_dict) result = new_message.execute() self.assertTrue(result) # Should be one cancel message for job 1 self.assertEqual(len(new_message.new_messages), 1) message = new_message.new_messages[0] self.assertEqual(message.type, 'cancel_jobs') self.assertListEqual(message._job_ids, [job_1.id])
def test_error_categories(self): """Tests successfully calling the requeue view filtered by job error category.""" error = error_test_utils.create_error(category="DATA") job = job_test_utils.create_job(error=error) json_data = {"error_categories": [error.category]} url = rest_util.get_url("/queue/requeue-jobs/") response = self.client.post(url, json.dumps(json_data), "application/json") self.assertEqual(response.status_code, status.HTTP_200_OK, response.content) result = json.loads(response.content) self.assertEqual(len(result["results"]), 1) self.assertEqual(result["results"][0]["id"], job.id) self.assertEqual(result["results"][0]["error"]["category"], error.category)
def test_error_category(self): """Tests successfully calling the source jobs view filtered by error category.""" from product.test import utils as product_test_utils error = error_test_utils.create_error(category='DATA') job = job_test_utils.create_job(error=error) job_exe = job_test_utils.create_job_exe(job=job) product_test_utils.create_file_link(ancestor=self.src_file, job=job, job_exe=job_exe) url = rest_util.get_url('/sources/%d/jobs/?error_category=%s' % (self.src_file.id, error.category)) response = self.client.generic('GET', url) self.assertEqual(response.status_code, status.HTTP_200_OK, response.content) result = json.loads(response.content) self.assertEqual(len(result['results']), 1) self.assertEqual(result['results'][0]['id'], job.id) self.assertEqual(result['results'][0]['error']['category'], error.category)
def setUp(self): django.setup() rest.login_client(self.client, is_staff=True) Error.objects.all().delete( ) # Need to remove initial errors loaded by fixtures error_test_utils.create_error(category='SYSTEM', is_builtin=True) error_test_utils.create_error(category='ALGORITHM') error_test_utils.create_error(name='data', category='DATA', job_type_name='type-1')
def setUp(self): django.setup() self.workspace = storage_test_utils.create_workspace() self.error = error_test_utils.create_error() self.interface = { 'version': '1.0', 'command': 'my_command', 'command_arguments': 'args', 'input_data': [{ 'name': 'Test Input 1', 'type': 'file', 'media_types': ['text/plain'], }], 'output_data': [{ 'name': 'Test Output 1', 'type': 'files', 'media_type': 'image/png', }]} self.job_interface = JobInterface(self.interface) self.error_mapping = ErrorInterface({ 'version': '1.0', 'exit_codes': { '1': self.error.name, } }) self.configuration = { 'version': '1.0', 'condition': { 'media_type': 'text/plain' }, 'data': { 'input_data_name': 'Test Input 1', 'workspace_name': self.workspace.name } } self.trigger_config = job_test_utils.MockTriggerRuleConfiguration(job_test_utils.MOCK_TYPE, self.configuration) self.trigger_rule = trigger_test_utils.create_trigger_rule(trigger_type=job_test_utils.MOCK_TYPE, configuration=self.trigger_config.get_dict()) self.invalid_trigger_config = job_test_utils.MockErrorTriggerRuleConfiguration(job_test_utils.MOCK_ERROR_TYPE, self.configuration) self.invalid_trigger_rule = trigger_test_utils.create_trigger_rule(trigger_type=job_test_utils.MOCK_ERROR_TYPE, configuration=self.trigger_config.get_dict())
def test_error_categories(self): """Tests successfully calling the requeue view filtered by job error category.""" error = error_test_utils.create_error(category='DATA') job = job_test_utils.create_job(error=error) json_data = { 'error_categories': [error.category], } url = '/queue/requeue-jobs/' response = self.client.post(url, json.dumps(json_data), 'application/json') result = json.loads(response.content) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(len(result['results']), 1) self.assertEqual(result['results'][0]['id'], job.id) self.assertEqual(result['results'][0]['error']['category'], error.category)
def test_error_categories(self): """Tests successfully calling the requeue view filtered by job error category.""" error = error_test_utils.create_error(category='DATA') job = job_test_utils.create_job(error=error) json_data = { 'error_categories': [error.category], } url = rest_util.get_url('/queue/requeue-jobs/') response = self.client.post(url, json.dumps(json_data), 'application/json') self.assertEqual(response.status_code, status.HTTP_200_OK, response.content) result = json.loads(response.content) self.assertEqual(len(result['results']), 1) self.assertEqual(result['results'][0]['id'], job.id) self.assertEqual(result['results'][0]['error']['category'], error.category)
def test_successful(self): """Tests calling QueueManager.requeue_existing_job() successfully.""" job = job_test_utils.create_job( job_type=self.job_type, status="FAILED", error=error_test_utils.create_error(), data=self.data, num_exes=1 ) old_max_tries = job.max_tries job_exe = Queue.objects.requeue_existing_job(job.id) # Make sure processor was called self.assertTrue(self.mock_processor.process_queued.called_with(job_exe, False)) # Make sure the job attributes were updated (must refresh the model first) job = Job.objects.get(pk=job.id) self.assertGreater(job.max_tries, old_max_tries) self.assertIsNone(job.error) # Make sure a job execution was queued self.assertTrue(Queue.objects.get(job_exe=job_exe))
def test_get_error_missing_default(self): """Tests that custom error is returned when a non-registered name is found in the mapping""" # Clear error cache so test works correctly CACHED_BUILTIN_ERRORS.clear() error_interface_dict = { 'version': '1.0', 'exit_codes': { '1': self.error_1.name, '2': self.error_2.name, '3': self.error_3.name, }, } default_error = error_test_utils.create_error() error_interface = ErrorInterface(error_interface_dict) error = error_interface.get_error(4, default_error.name) self.assertIsNotNone(error) self.assertEqual(error.name, default_error.name)
def test_failed_normal_job_execution(self): """Tests running through a normal job execution that fails""" job_exe = JobExecution.objects.get_job_exe_with_job_and_job_type( self._job_exe_id) error = error_test_utils.create_error() running_job_exe = RunningJobExecution(job_exe) self.assertFalse(running_job_exe.is_finished()) self.assertTrue(running_job_exe.is_next_task_ready()) # Start pre-task task = running_job_exe.start_next_task() pre_task_id = task.id self.assertFalse(running_job_exe.is_finished()) self.assertFalse(running_job_exe.is_next_task_ready()) # Pre-task running pre_task_started = now() - timedelta( minutes=5 ) # Lots of time so now() called at completion is in future running_job_exe.task_start(pre_task_id, pre_task_started) self.assertFalse(running_job_exe.is_finished()) self.assertFalse(running_job_exe.is_next_task_ready()) # Fail pre-task pre_task_failed = pre_task_started + timedelta(seconds=1) pre_task_results = TaskResults(pre_task_id) pre_task_results.exit_code = 1 pre_task_results.when = pre_task_failed running_job_exe.task_fail(pre_task_results, error) self.assertTrue(running_job_exe.is_finished()) self.assertFalse(running_job_exe.is_next_task_ready()) job_exe = JobExecution.objects.get(id=self._job_exe_id) self.assertEqual(pre_task_started, job_exe.pre_started) self.assertEqual(pre_task_failed, job_exe.pre_completed) self.assertEqual(1, job_exe.pre_exit_code) self.assertEqual('FAILED', job_exe.status) self.assertEqual(error.id, job_exe.error_id) self.assertGreater(job_exe.ended, pre_task_failed)
def test_get_error_missing_default(self): """Tests that custom error is returned when a non-registered name is found in the mapping""" error_interface_dict = { 'version': '1.0', 'exit_codes': { '1': self.error_1.name, '2': self.error_2.name, '3': self.error_3.name, }, } default_error = error_test_utils.create_error() default_error.is_builtin = True default_error.save() # Reset error cache so tests work correctly reset_error_cache() error_interface = ErrorInterface(error_interface_dict) error = error_interface.get_error(4, default_error.name) self.assertIsNotNone(error) self.assertEqual(error.name, default_error.name)
def test_priority(self): """Tests successfully calling the requeue view changing the job priority.""" job_test_utils.create_job_exe(job=self.job_2, status="FAILED") job_test_utils.create_job_exe(job=self.job_2, status="FAILED") # make sure the job is in the right state despite not actually having been run Job.objects.update_status([self.job_2], "FAILED", timezone.now(), error_test_utils.create_error()) self.job_2.num_exes = 2 self.job_2.save() json_data = {"job_ids": [self.job_2.id], "priority": 123} url = rest_util.get_url("/queue/requeue-jobs/") response = self.client.post(url, json.dumps(json_data), "application/json") self.assertEqual(response.status_code, status.HTTP_200_OK, response.content) result = json.loads(response.content) self.assertEqual(len(result["results"]), 1) self.assertEqual(result["results"][0]["id"], self.job_2.id) self.assertEqual(result["results"][0]["status"], "QUEUED") self.assertEqual(result["results"][0]["priority"], 123)
def test_successful(self): """Tests calling QueueManager.requeue_existing_job() successfully.""" job = job_test_utils.create_job(job_type=self.job_type, status='FAILED', error=error_test_utils.create_error(), data=self.data, num_exes=1) old_max_tries = job.max_tries job_exe = Queue.objects.requeue_existing_job(job.id) # Make sure processor was called self.assertTrue( self.mock_processor.process_queued.called_with(job_exe, False)) # Make sure the job attributes were updated (must refresh the model first) job = Job.objects.get(pk=job.id) self.assertGreater(job.max_tries, old_max_tries) self.assertIsNone(job.error) # Make sure a job execution was queued self.assertTrue(Queue.objects.get(job_exe=job_exe))
def test_failed_normal_job_execution(self): """Tests running through a normal job execution that fails""" job_exe = JobExecution.objects.get_job_exe_with_job_and_job_type(self._job_exe_id) error = error_test_utils.create_error() running_job_exe = RunningJobExecution(job_exe) self.assertFalse(running_job_exe.is_finished()) self.assertTrue(running_job_exe.is_next_task_ready()) # Start pre-task task = running_job_exe.start_next_task() pre_task_id = task.id self.assertFalse(running_job_exe.is_finished()) self.assertFalse(running_job_exe.is_next_task_ready()) # Pre-task running pre_task_started = now() - timedelta(minutes=5) # Lots of time so now() called at completion is in future running_job_exe.task_start(pre_task_id, pre_task_started) self.assertFalse(running_job_exe.is_finished()) self.assertFalse(running_job_exe.is_next_task_ready()) # Fail pre-task pre_task_failed = pre_task_started + timedelta(seconds=1) pre_task_results = TaskResults(pre_task_id) pre_task_results.exit_code = 1 pre_task_results.when = pre_task_failed running_job_exe.task_fail(pre_task_results, error) self.assertTrue(running_job_exe.is_finished()) self.assertFalse(running_job_exe.is_next_task_ready()) job_exe = JobExecution.objects.get(id=self._job_exe_id) self.assertEqual(pre_task_started, job_exe.pre_started) self.assertEqual(pre_task_failed, job_exe.pre_completed) self.assertEqual(1, job_exe.pre_exit_code) self.assertEqual('FAILED', job_exe.status) self.assertEqual(error.id, job_exe.error_id) self.assertGreater(job_exe.ended, pre_task_failed)
def test_requeue_failed(self,): """Tests calling the requeue view successfully for a job that was previously queued.""" job_test_utils.create_job_exe(job=self.job_2, status="FAILED") job_test_utils.create_job_exe(job=self.job_2, status="FAILED") # make sure the job is in the right state despite not actually having been run Job.objects.update_status([self.job_2], "FAILED", timezone.now(), error_test_utils.create_error()) self.job_2.num_exes = 2 self.job_2.save() base_count = Queue.objects.count() json_data = {"job_ids": [self.job_2.id]} url = rest_util.get_url("/queue/requeue-jobs/") response = self.client.post(url, json.dumps(json_data), "application/json") self.assertEqual(response.status_code, status.HTTP_200_OK, response.content) result = json.loads(response.content) self.assertEqual(len(result["results"]), 1) self.assertEqual(result["results"][0]["id"], self.job_2.id) self.assertEqual(result["results"][0]["status"], "QUEUED") self.assertEqual(Queue.objects.count() - base_count, 1)
def setUp(self): django.setup() self.workspace = storage_test_utils.create_workspace() self.error = error_test_utils.create_error() interface = { 'version': '1.0', 'command': 'my_command', 'command_arguments': 'args', 'input_data': [{ 'name': 'Test Input 1', 'type': 'file', 'media_types': ['text/plain'], }], 'output_data': [{ 'name': 'Test Output 1', 'type': 'files', 'media_type': 'image/png', }]} self.job_interface = JobInterface(interface) new_interface = { 'version': '1.0', 'command': 'my_command', 'command_arguments': 'args', 'input_data': [{ 'name': 'Test Input 2', 'type': 'files', 'media_types': ['image/png', 'image/tiff'], }], 'output_data': [{ 'name': 'Test Output 2', 'type': 'file', }]} self.new_job_interface = JobInterface(new_interface) self.configuration = { 'version': '1.0', 'condition': { 'media_type': 'text/plain' }, 'data': { 'input_data_name': 'Test Input 1', 'workspace_name': self.workspace.name } } self.trigger_config = job_test_utils.MockTriggerRuleConfiguration(job_test_utils.MOCK_TYPE, self.configuration) self.new_configuration = { 'version': '1.0', 'condition': { 'media_type': 'application/json' }, 'data': { 'input_data_name': 'Test Input 1', 'workspace_name': self.workspace.name } } self.new_trigger_config = job_test_utils.MockTriggerRuleConfiguration(job_test_utils.MOCK_TYPE, self.new_configuration)
def test_calculate_stats(self): """Tests calculating individual statistics for a metrics entry.""" job_type = job_test_utils.create_job_type() job1 = job_test_utils.create_job(job_type=job_type, status='COMPLETED', ended=datetime.datetime(2015, 1, 1, tzinfo=utc)) task_results_dict = {'version': '1.0', 'tasks': [{'task_id': '1', 'type': 'pre', 'was_launched': True, 'started': datetime_to_string(datetime.datetime(2015, 1, 1, 0, 30, 4, tzinfo=utc)), 'ended': datetime_to_string(datetime.datetime(2015, 1, 1, 1, 6, tzinfo=utc))}, {'task_id': '2', 'type': 'main', 'was_launched': True, 'started': datetime_to_string(datetime.datetime(2015, 1, 1, 1, 40, 8, tzinfo=utc)), 'ended': datetime_to_string(datetime.datetime(2015, 1, 1, 2, 30, 10, tzinfo=utc))}, {'task_id': '3', 'type': 'post', 'was_launched': True, 'started': datetime_to_string(datetime.datetime(2015, 1, 1, 3, 30, 12, tzinfo=utc)), 'ended': datetime_to_string(datetime.datetime(2015, 1, 1, 4, 40, 14, tzinfo=utc))}]} job_test_utils.create_job_exe( job=job1, status=job1.status, queued=datetime.datetime(2015, 1, 1, tzinfo=utc), started=datetime.datetime(2015, 1, 1, 0, 10, 2, tzinfo=utc), # pre_started=datetime.datetime(2015, 1, 1, 0, 30, 4, tzinfo=utc), # pre_completed=datetime.datetime(2015, 1, 1, 1, 6, tzinfo=utc), # job_started=datetime.datetime(2015, 1, 1, 1, 40, 8, tzinfo=utc), # job_completed=datetime.datetime(2015, 1, 1, 2, 30, 10, tzinfo=utc), # post_started=datetime.datetime(2015, 1, 1, 3, 30, 12, tzinfo=utc), # post_completed=datetime.datetime(2015, 1, 1, 4, 40, 14, tzinfo=utc), ended=datetime.datetime(2015, 1, 1, 6, 0, 16, tzinfo=utc), task_results=TaskResults(task_results_dict) ) job2 = job_test_utils.create_job(job_type=job_type, status='COMPLETED', ended=datetime.datetime(2015, 1, 1, tzinfo=utc)) task_results_dict = {'version': '1.0', 'tasks': [{'task_id': '1', 'type': 'pre', 'was_launched': True, 'started': datetime_to_string(datetime.datetime(2015, 1, 1, 4, 30, 4, tzinfo=utc)), 'ended': datetime_to_string(datetime.datetime(2015, 1, 1, 6, 0, 8, tzinfo=utc))}, {'task_id': '2', 'type': 'main', 'was_launched': True, 'started': datetime_to_string(datetime.datetime(2015, 1, 1, 8, 40, 14, tzinfo=utc)), 'ended': datetime_to_string(datetime.datetime(2015, 1, 1, 10, 30, 22, tzinfo=utc))}, {'task_id': '3', 'type': 'post', 'was_launched': True, 'started': datetime_to_string(datetime.datetime(2015, 1, 1, 12, 30, 32, tzinfo=utc)), 'ended': datetime_to_string(datetime.datetime(2015, 1, 1, 14, 40, 44, tzinfo=utc))}]} job_test_utils.create_job_exe( job=job2, status=job2.status, queued=datetime.datetime(2015, 1, 1, tzinfo=utc), started=datetime.datetime(2015, 1, 1, 2, 10, 2, tzinfo=utc), # pre_started=datetime.datetime(2015, 1, 1, 4, 30, 4, tzinfo=utc), # pre_completed=datetime.datetime(2015, 1, 1, 6, 0, 8, tzinfo=utc), # job_started=datetime.datetime(2015, 1, 1, 8, 40, 14, tzinfo=utc), # job_completed=datetime.datetime(2015, 1, 1, 10, 30, 22, tzinfo=utc), # post_started=datetime.datetime(2015, 1, 1, 12, 30, 32, tzinfo=utc), # post_completed=datetime.datetime(2015, 1, 1, 14, 40, 44, tzinfo=utc), ended=datetime.datetime(2015, 1, 1, 16, 0, 58, tzinfo=utc), task_results=TaskResults(task_results_dict) ) sys_error = error_test_utils.create_error(category='SYSTEM') job3a = job_test_utils.create_job(job_type=job_type, status='FAILED', ended=datetime.datetime(2015, 1, 1, tzinfo=utc), error=sys_error) job_test_utils.create_job_exe(job=job3a, status=job3a.status, ended=job3a.ended, error=sys_error) data_error = error_test_utils.create_error(category='DATA') job3b = job_test_utils.create_job(job_type=job_type, status='FAILED', ended=datetime.datetime(2015, 1, 1, tzinfo=utc), error=data_error) job_test_utils.create_job_exe(job=job3b, status=job3b.status, ended=job3b.ended, error=data_error) algo_error = error_test_utils.create_error(category='ALGORITHM') job3c = job_test_utils.create_job(job_type=job_type, status='FAILED', ended=datetime.datetime(2015, 1, 1, tzinfo=utc), error=algo_error) job_test_utils.create_job_exe(job=job3c, status=job3c.status, ended=job3c.ended, error=algo_error) job4 = job_test_utils.create_job(job_type=job_type, status='CANCELED', ended=datetime.datetime(2015, 1, 1, tzinfo=utc)) job_test_utils.create_job_exe(job=job4, status=job4.status, ended=job4.ended) MetricsJobType.objects.calculate(datetime.date(2015, 1, 1)) entries = MetricsJobType.objects.filter(occurred=datetime.date(2015, 1, 1)) self.assertEqual(len(entries), 1) entry = entries.first() self.assertEqual(entry.occurred, datetime.date(2015, 1, 1)) self.assertEqual(entry.completed_count, 2) self.assertEqual(entry.failed_count, 3) self.assertEqual(entry.canceled_count, 1) self.assertEqual(entry.total_count, 6) self.assertEqual(entry.error_system_count, 1) self.assertEqual(entry.error_data_count, 1) self.assertEqual(entry.error_algorithm_count, 1) self.assertEqual(entry.queue_time_sum, 8404) self.assertEqual(entry.queue_time_min, 602) self.assertEqual(entry.queue_time_max, 7802) self.assertEqual(entry.queue_time_avg, 4202) self.assertEqual(entry.pre_time_sum, 7560) self.assertEqual(entry.pre_time_min, 2156) self.assertEqual(entry.pre_time_max, 5404) self.assertEqual(entry.pre_time_avg, 3780) self.assertEqual(entry.job_time_sum, 9610) self.assertEqual(entry.job_time_min, 3002) self.assertEqual(entry.job_time_max, 6608) self.assertEqual(entry.job_time_avg, 4805) self.assertEqual(entry.post_time_sum, 12014) self.assertEqual(entry.post_time_min, 4202) self.assertEqual(entry.post_time_max, 7812) self.assertEqual(entry.post_time_avg, 6007) self.assertEqual(entry.run_time_sum, 70870) self.assertEqual(entry.run_time_min, 21014) self.assertEqual(entry.run_time_max, 49856) self.assertEqual(entry.run_time_avg, 35435) self.assertEqual(entry.stage_time_sum, 41686) self.assertEqual(entry.stage_time_min, 11654) self.assertEqual(entry.stage_time_max, 30032) self.assertEqual(entry.stage_time_avg, 20843)
def test_execute(self): """Tests calling FailedJobs.execute() successfully""" error_1 = error_test_utils.create_error(should_be_retried=True) error_2 = error_test_utils.create_error(should_be_retried=False) data = JobData() job_1 = job_test_utils.create_job(num_exes=1, status='QUEUED', data=data.get_dict(), max_tries=2) job_2 = job_test_utils.create_job(num_exes=1, status='RUNNING', data=data.get_dict(), max_tries=2) job_3 = job_test_utils.create_job(num_exes=1, status='RUNNING', data=data.get_dict(), max_tries=1) job_4 = job_test_utils.create_job(num_exes=1, status='RUNNING', data=data.get_dict(), max_tries=2) job_5 = job_test_utils.create_job(num_exes=1, status='RUNNING', data=data.get_dict(), max_tries=2) job_6 = job_test_utils.create_job(num_exes=1, status='FAILED', data=data.get_dict(), max_tries=2) job_7 = job_test_utils.create_job(num_exes=0, status='CANCELED') job_ids = [ job_1.id, job_2.id, job_3.id, job_4.id, job_5.id, job_6.id, job_7.id ] from recipe.test import utils as recipe_test_utils recipe_1 = recipe_test_utils.create_recipe() recipe_test_utils.create_recipe_job(recipe=recipe_1, job=job_3) recipe_2 = recipe_test_utils.create_recipe() recipe_test_utils.create_recipe_job(recipe=recipe_2, job=job_4) when_ended = now() # Add jobs to message message = FailedJobs() message.ended = when_ended if message.can_fit_more(): message.add_failed_job( FailedJob(job_1.id, job_1.num_exes, error_1.id)) if message.can_fit_more(): message.add_failed_job( FailedJob(job_2.id, job_2.num_exes, error_1.id)) if message.can_fit_more(): message.add_failed_job( FailedJob(job_3.id, job_3.num_exes, error_1.id)) if message.can_fit_more(): message.add_failed_job( FailedJob(job_4.id, job_4.num_exes, error_2.id)) # Error that cannot be retried if message.can_fit_more(): message.add_failed_job( FailedJob(job_5.id, job_5.num_exes - 1, error_1.id)) # Mismatched exe_num if message.can_fit_more(): message.add_failed_job( FailedJob(job_6.id, job_6.num_exes, error_1.id)) if message.can_fit_more(): message.add_failed_job( FailedJob(job_7.id, job_7.num_exes - 1, error_1.id)) # Execute message result = message.execute() self.assertTrue(result) jobs = Job.objects.filter(id__in=job_ids).order_by('id') queued_jobs_msg = None update_recipes_msg = None self.assertEqual(len(message.new_messages), 2) for msg in message.new_messages: if msg.type == 'queued_jobs': queued_jobs_msg = msg elif msg.type == 'update_recipes': update_recipes_msg = msg self.assertEqual(len(queued_jobs_msg._queued_jobs), 2) # 2 jobs should have been retried self.assertEqual(len(update_recipes_msg._recipe_ids), 2) # 2 jobs should have been failed # Job 1 should be retried and put back on the queue self.assertEqual(jobs[0].status, 'QUEUED') self.assertEqual(jobs[0].num_exes, 1) self.assertEqual(queued_jobs_msg._queued_jobs[0].job_id, job_1.id) # Job 2 should be retried and put back on the queue self.assertEqual(jobs[1].status, 'RUNNING') self.assertEqual(jobs[1].num_exes, 1) self.assertEqual(queued_jobs_msg._queued_jobs[1].job_id, job_2.id) # Job 3 should be failed since max_tries is used up self.assertEqual(jobs[2].status, 'FAILED') self.assertEqual(jobs[2].num_exes, 1) self.assertEqual(jobs[2].error_id, error_1.id) self.assertEqual(jobs[2].ended, when_ended) self.assertTrue(recipe_1.id in update_recipes_msg._recipe_ids) # Job 4 should be failed since error cannot be retried self.assertEqual(jobs[3].status, 'FAILED') self.assertEqual(jobs[3].num_exes, 1) self.assertEqual(jobs[3].error_id, error_2.id) self.assertEqual(jobs[3].ended, when_ended) self.assertTrue(recipe_2.id in update_recipes_msg._recipe_ids) # Job 5 should be ignored since mismatched exe_num self.assertEqual(jobs[4].status, 'RUNNING') self.assertEqual(jobs[4].num_exes, 1) # Job 6 should be ignored since it is already failed self.assertEqual(jobs[5].status, 'FAILED') self.assertEqual(jobs[5].num_exes, 1) # Job 6 should be ignored since it is canceled self.assertEqual(jobs[6].status, 'CANCELED') self.assertEqual(jobs[6].num_exes, 0) # Test executing message again message_json_dict = message.to_json() message = FailedJobs.from_json(message_json_dict) result = message.execute() self.assertTrue(result) jobs = Job.objects.filter(id__in=job_ids).order_by('id') self.assertEqual(len(message.new_messages), 1) queued_jobs_msg = message.new_messages[0] self.assertEqual(queued_jobs_msg.type, 'queued_jobs') # The same 2 jobs should have been retried self.assertEqual(len(queued_jobs_msg._queued_jobs), 2) # Job 1 should be retried and put back on the queue self.assertEqual(jobs[0].status, 'QUEUED') self.assertEqual(jobs[0].num_exes, 1) self.assertEqual(queued_jobs_msg._queued_jobs[0].job_id, job_1.id) # Job 2 should be retried and put back on the queue self.assertEqual(jobs[1].status, 'RUNNING') self.assertEqual(jobs[1].num_exes, 1) self.assertEqual(queued_jobs_msg._queued_jobs[1].job_id, job_2.id) # Job 3 should be failed from first execution self.assertEqual(jobs[2].status, 'FAILED') self.assertEqual(jobs[2].num_exes, 1) self.assertEqual(jobs[2].error_id, error_1.id) # Job 4 should be failed from first execution self.assertEqual(jobs[3].status, 'FAILED') self.assertEqual(jobs[3].num_exes, 1) self.assertEqual(jobs[3].error_id, error_2.id) # Job 5 should be ignored since mismatched exe_num self.assertEqual(jobs[4].status, 'RUNNING') self.assertEqual(jobs[4].num_exes, 1) # Job 6 should be ignored since it is already failed self.assertEqual(jobs[5].status, 'FAILED') self.assertEqual(jobs[5].num_exes, 1) # Job 6 should be ignored since it is canceled self.assertEqual(jobs[6].status, 'CANCELED') self.assertEqual(jobs[6].num_exes, 0)
def test_json(self): """Tests coverting a FailedJobs message to and from JSON""" error = error_test_utils.create_error(should_be_retried=True) data = JobData() job_1 = job_test_utils.create_job(num_exes=1, status='QUEUED', data=data.get_dict(), max_tries=2) job_2 = job_test_utils.create_job(num_exes=1, status='RUNNING', data=data.get_dict(), max_tries=1) job_3 = job_test_utils.create_job(num_exes=0, status='PENDING') job_ids = [job_1.id, job_2.id, job_3.id] from recipe.test import utils as recipe_test_utils recipe_1 = recipe_test_utils.create_recipe() recipe_test_utils.create_recipe_job(recipe=recipe_1, job=job_2) when_ended = now() # Add jobs to message message = FailedJobs() message.ended = when_ended if message.can_fit_more(): message.add_failed_job( FailedJob(job_1.id, job_1.num_exes, error.id)) if message.can_fit_more(): message.add_failed_job( FailedJob(job_2.id, job_2.num_exes, error.id)) if message.can_fit_more(): message.add_failed_job( FailedJob(job_3.id, job_3.num_exes, error.id)) # Convert message to JSON and back, and then execute message_json_dict = message.to_json() new_message = FailedJobs.from_json(message_json_dict) result = new_message.execute() self.assertTrue(result) jobs = Job.objects.filter(id__in=job_ids).order_by('id') queued_jobs_msg = None update_recipes_msg = None self.assertEqual(len(new_message.new_messages), 2) for msg in new_message.new_messages: if msg.type == 'queued_jobs': queued_jobs_msg = msg elif msg.type == 'update_recipes': update_recipes_msg = msg # Job 1 should be retried and put back on the queue self.assertEqual(jobs[0].status, 'QUEUED') self.assertEqual(jobs[0].num_exes, 1) self.assertEqual(len(queued_jobs_msg._queued_jobs), 1) self.assertEqual(queued_jobs_msg._queued_jobs[0].job_id, job_1.id) # Job 2 should be failed since max_tries is used up self.assertEqual(jobs[1].status, 'FAILED') self.assertEqual(jobs[1].num_exes, 1) self.assertEqual(jobs[1].error_id, error.id) self.assertEqual(jobs[1].ended, when_ended) self.assertEqual(len(update_recipes_msg._recipe_ids), 1) self.assertTrue(recipe_1.id in update_recipes_msg._recipe_ids) # Job 3 should ignore update self.assertEqual(jobs[2].status, 'PENDING') self.assertEqual(jobs[2].num_exes, 0)
def test_calculate_stats(self): """Tests calculating individual statistics for a metrics entry.""" job_type = job_test_utils.create_job_type() job1 = job_test_utils.create_job(job_type=job_type, status='COMPLETED', ended=datetime.datetime(2015, 1, 1)) job_test_utils.create_job_exe( job=job1, status=job1.status, queued=datetime.datetime(2015, 1, 1, tzinfo=timezone.utc), started=datetime.datetime(2015, 1, 1, 0, 10, 2, tzinfo=timezone.utc), pre_started=datetime.datetime(2015, 1, 1, 0, 30, 4, tzinfo=timezone.utc), pre_completed=datetime.datetime(2015, 1, 1, 1, 6, tzinfo=timezone.utc), job_started=datetime.datetime(2015, 1, 1, 1, 40, 8, tzinfo=timezone.utc), job_completed=datetime.datetime(2015, 1, 1, 2, 30, 10, tzinfo=timezone.utc), post_started=datetime.datetime(2015, 1, 1, 3, 30, 12, tzinfo=timezone.utc), post_completed=datetime.datetime(2015, 1, 1, 4, 40, 14, tzinfo=timezone.utc), ended=datetime.datetime(2015, 1, 1, 6, 0, 16, tzinfo=timezone.utc), ) job2 = job_test_utils.create_job(job_type=job_type, status='COMPLETED', ended=datetime.datetime(2015, 1, 1)) job_test_utils.create_job_exe( job=job2, status=job2.status, queued=datetime.datetime(2015, 1, 1, tzinfo=timezone.utc), started=datetime.datetime(2015, 1, 1, 2, 10, 2, tzinfo=timezone.utc), pre_started=datetime.datetime(2015, 1, 1, 4, 30, 4, tzinfo=timezone.utc), pre_completed=datetime.datetime(2015, 1, 1, 6, 0, 8, tzinfo=timezone.utc), job_started=datetime.datetime(2015, 1, 1, 8, 40, 14, tzinfo=timezone.utc), job_completed=datetime.datetime(2015, 1, 1, 10, 30, 22, tzinfo=timezone.utc), post_started=datetime.datetime(2015, 1, 1, 12, 30, 32, tzinfo=timezone.utc), post_completed=datetime.datetime(2015, 1, 1, 14, 40, 44, tzinfo=timezone.utc), ended=datetime.datetime(2015, 1, 1, 16, 0, 58, tzinfo=timezone.utc), ) sys_error = error_test_utils.create_error(category='SYSTEM') job3a = job_test_utils.create_job(job_type=job_type, status='FAILED', ended=datetime.datetime(2015, 1, 1), error=sys_error) job_test_utils.create_job_exe(job=job3a, status=job3a.status, ended=job3a.ended, error=sys_error) data_error = error_test_utils.create_error(category='DATA') job3b = job_test_utils.create_job(job_type=job_type, status='FAILED', ended=datetime.datetime(2015, 1, 1), error=data_error) job_test_utils.create_job_exe(job=job3b, status=job3b.status, ended=job3b.ended, error=data_error) algo_error = error_test_utils.create_error(category='ALGORITHM') job3c = job_test_utils.create_job(job_type=job_type, status='FAILED', ended=datetime.datetime(2015, 1, 1), error=algo_error) job_test_utils.create_job_exe(job=job3c, status=job3c.status, ended=job3c.ended, error=algo_error) job4 = job_test_utils.create_job(job_type=job_type, status='CANCELED', ended=datetime.datetime(2015, 1, 1)) job_test_utils.create_job_exe(job=job4, status=job4.status, ended=job4.ended) MetricsJobType.objects.calculate(datetime.date(2015, 1, 1)) entries = MetricsJobType.objects.filter(occurred=datetime.date(2015, 1, 1)) self.assertEqual(len(entries), 1) entry = entries.first() self.assertEqual(entry.occurred, datetime.date(2015, 1, 1)) self.assertEqual(entry.completed_count, 2) self.assertEqual(entry.failed_count, 3) self.assertEqual(entry.canceled_count, 1) self.assertEqual(entry.total_count, 6) self.assertEqual(entry.error_system_count, 1) self.assertEqual(entry.error_data_count, 1) self.assertEqual(entry.error_algorithm_count, 1) self.assertEqual(entry.queue_time_sum, 8404) self.assertEqual(entry.queue_time_min, 602) self.assertEqual(entry.queue_time_max, 7802) self.assertEqual(entry.queue_time_avg, 4202) self.assertEqual(entry.pre_time_sum, 7560) self.assertEqual(entry.pre_time_min, 2156) self.assertEqual(entry.pre_time_max, 5404) self.assertEqual(entry.pre_time_avg, 3780) self.assertEqual(entry.job_time_sum, 9610) self.assertEqual(entry.job_time_min, 3002) self.assertEqual(entry.job_time_max, 6608) self.assertEqual(entry.job_time_avg, 4805) self.assertEqual(entry.post_time_sum, 12014) self.assertEqual(entry.post_time_min, 4202) self.assertEqual(entry.post_time_max, 7812) self.assertEqual(entry.post_time_avg, 6007) self.assertEqual(entry.run_time_sum, 70870) self.assertEqual(entry.run_time_min, 21014) self.assertEqual(entry.run_time_max, 49856) self.assertEqual(entry.run_time_avg, 35435) self.assertEqual(entry.stage_time_sum, 41686) self.assertEqual(entry.stage_time_min, 11654) self.assertEqual(entry.stage_time_max, 30032) self.assertEqual(entry.stage_time_avg, 20843)
def setUp(self): django.setup() self.error_1 = error_test_utils.create_error(name=u"unknown", category=u"SYSTEM") self.error_2 = error_test_utils.create_error(name=u"database", category=u"SYSTEM") self.error_3 = error_test_utils.create_error(name=u"timeout", category=u"ALGORITHM")