def test_cleanup_params_defaults(): inst = Instance(hostname='foobar') assert inst.get_cleanup_task_kwargs(exclude_strings=['awx_423_']) == { 'exclude_strings': ['awx_423_'], 'file_pattern': '/tmp/awx_*_*', 'grace_period': 60 }
def test_capacity_adjustment_no_save(capacity_adjustment): inst = Instance(hostname='test-host', capacity_adjustment=Decimal(capacity_adjustment), capacity=0, cpu_capacity=10, mem_capacity=1000) assert inst.capacity == 0 assert inst.capacity_adjustment == capacity_adjustment # sanity inst.set_capacity_value() assert inst.capacity > 0 assert inst.capacity == (float(inst.capacity_adjustment) * abs(inst.mem_capacity - inst.cpu_capacity) + min(inst.mem_capacity, inst.cpu_capacity))
def test_workflow_does_not_reap(self): i = Instance(hostname='awx') i.save() j = WorkflowJob(status='running', execution_node='awx') j.save() reaper.reap(i) assert WorkflowJob.objects.first().status == 'running'
def test_cleanup_params_for_image_cleanup(): inst = Instance(hostname='foobar') # see CLI conversion in awx.main.tests.unit.utils.test_receptor assert inst.get_cleanup_task_kwargs(file_pattern='', remove_images=['quay.invalid/foo/bar'], image_prune=True) == { 'file_pattern': '', 'process_isolation_executable': 'podman', 'remove_images': ['quay.invalid/foo/bar'], 'image_prune': True, 'grace_period': 60, }
def _register_hostname(self, hostname): if not hostname: return with advisory_lock('instance_registration_%s' % hostname): instance = Instance.objects.filter(hostname=hostname) if instance.exists(): print("Instance already registered {}".format( instance[0].hostname)) return instance = Instance(uuid=self.uuid, hostname=hostname) instance.save() print('Successfully registered instance {}'.format(hostname)) self.changed = True
def test_do_not_reap_excluded_uuids(self, excluded_uuids, fail): i = Instance(hostname='awx') i.save() j = Job( status='running', execution_node='awx', controller_node='', start_args='SENSITIVE', celery_task_id='abc123', ) j.save() # if the UUID is excluded, don't reap it reaper.reap(i, excluded_uuids=excluded_uuids) job = Job.objects.first() if fail: assert job.status == 'failed' assert 'marked as failed' in job.job_explanation assert job.start_args == '' else: assert job.status == 'running'
def test_should_reap(self, status, fail, execution_node, controller_node, modified): i = Instance(hostname='awx') i.save() j = Job( status=status, execution_node=execution_node, controller_node=controller_node, start_args='SENSITIVE', ) j.save() if modified: # we have to edit the modification time _without_ calling save() # (because .save() overwrites it to _now_) Job.objects.filter(id=j.id).update(modified=modified) reaper.reap(i) job = Job.objects.first() if fail: assert job.status == 'failed' assert 'marked as failed' in job.job_explanation assert job.start_args == '' else: assert job.status == status
def start_task(self, task, rampart_group, dependent_tasks=None, instance=None): self.start_task_limit -= 1 if self.start_task_limit == 0: # schedule another run immediately after this task manager schedule_task_manager() from awx.main.tasks import handle_work_error, handle_work_success dependent_tasks = dependent_tasks or [] task_actual = { 'type': get_type_for_model(type(task)), 'id': task.id, } dependencies = [{'type': get_type_for_model(type(t)), 'id': t.id} for t in dependent_tasks] task.status = 'waiting' (start_status, opts) = task.pre_start() if not start_status: task.status = 'failed' if task.job_explanation: task.job_explanation += ' ' task.job_explanation += 'Task failed pre-start check.' task.save() # TODO: run error handler to fail sub-tasks and send notifications else: if type(task) is WorkflowJob: task.status = 'running' task.send_notification_templates('running') logger.debug('Transitioning %s to running status.', task.log_format) schedule_task_manager() elif rampart_group.is_container_group: task.instance_group = rampart_group if task.capacity_type == 'execution': # find one real, non-containerized instance with capacity to # act as the controller for k8s API interaction try: task.controller_node = Instance.choose_online_control_plane_node() task.log_lifecycle("controller_node_chosen") except IndexError: logger.warning("No control plane nodes available to run containerized job {}".format(task.log_format)) return else: # project updates and system jobs don't *actually* run in pods, so # just pick *any* non-containerized host and use it as the execution node task.execution_node = Instance.choose_online_control_plane_node() task.log_lifecycle("execution_node_chosen") logger.debug('Submitting containerized {} to queue {}.'.format(task.log_format, task.execution_node)) else: task.instance_group = rampart_group task.execution_node = instance.hostname task.log_lifecycle("execution_node_chosen") if instance.node_type == 'execution': try: task.controller_node = Instance.choose_online_control_plane_node() task.log_lifecycle("controller_node_chosen") except IndexError: logger.warning("No control plane nodes available to manage {}".format(task.log_format)) return else: # control plane nodes will manage jobs locally for performance and resilience task.controller_node = task.execution_node task.log_lifecycle("controller_node_chosen") logger.debug('Submitting job {} to queue {} controlled by {}.'.format(task.log_format, task.execution_node, task.controller_node)) with disable_activity_stream(): task.celery_task_id = str(uuid.uuid4()) task.save() task.log_lifecycle("waiting") if rampart_group is not None: self.consume_capacity(task, rampart_group.name, instance=instance) def post_commit(): if task.status != 'failed' and type(task) is not WorkflowJob: # Before task is dispatched, ensure that job_event partitions exist create_partition(task.event_class._meta.db_table, start=task.created) task_cls = task._get_task_class() task_cls.apply_async( [task.pk], opts, queue=task.get_queue_name(), uuid=task.celery_task_id, callbacks=[{'task': handle_work_success.name, 'kwargs': {'task_actual': task_actual}}], errbacks=[{'task': handle_work_error.name, 'args': [task.celery_task_id], 'kwargs': {'subtasks': [task_actual] + dependencies}}], ) task.websocket_emit_status(task.status) # adds to on_commit connection.on_commit(post_commit)
def mock_me(): me_mock = mock.MagicMock(return_value=Instance(id=1, hostname=settings.CLUSTER_HOST_ID, uuid='00000000-0000-0000-0000-000000000000')) with mock.patch.object(Instance.objects, 'me', me_mock): yield