def test_zombies_are_correctly_passed_to_dag_file_processor(self): """ Check that the same set of zombies are passed to the dag file processors until the next zombie detection logic is invoked. """ with conf_vars({ ('scheduler', 'max_threads'): '1', ('core', 'load_examples'): 'False' }): dagbag = DagBag( os.path.join(TEST_DAG_FOLDER, 'test_example_bash_operator.py')) with create_session() as session: session.query(LJ).delete() dag = dagbag.get_dag('test_example_bash_operator') task = dag.get_task(task_id='run_this_last') ti = TI(task, DEFAULT_DATE, State.RUNNING) local_job = LJ(ti) local_job.state = State.SHUTDOWN local_job.id = 1 ti.job_id = local_job.id session.add(local_job) session.add(ti) session.commit() fake_zombies = [SimpleTaskInstance(ti)] class FakeDagFileProcessorRunner(DagFileProcessorProcess): # This fake processor will return the zombies it received in constructor # as its processing result w/o actually parsing anything. def __init__(self, file_path, pickle_dags, dag_id_white_list, zombies): super().__init__(file_path, pickle_dags, dag_id_white_list, zombies) self._result = zombies, 0 def start(self): pass @property def start_time(self): return DEFAULT_DATE @property def pid(self): return 1234 @property def done(self): return True @property def result(self): return self._result def processor_factory(file_path, zombies): return FakeDagFileProcessorRunner(file_path, False, [], zombies) test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_example_bash_operator.py') async_mode = 'sqlite' not in conf.get('core', 'sql_alchemy_conn') processor_agent = DagFileProcessorAgent(test_dag_path, [], 1, processor_factory, timedelta.max, async_mode) processor_agent.start() parsing_result = [] if not async_mode: processor_agent.heartbeat() while not processor_agent.done: if not async_mode: processor_agent.wait_until_finished() parsing_result.extend(processor_agent.harvest_simple_dags()) self.assertEqual(len(fake_zombies), len(parsing_result)) self.assertEqual(set(zombie.key for zombie in fake_zombies), set(result.key for result in parsing_result))
def test_handle_failure_callback_with_zombies_are_correctly_passed_to_dag_file_processor( self): """ Check that the same set of failure callback with zombies are passed to the dag file processors until the next zombie detection logic is invoked. """ test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_example_bash_operator.py') with conf_vars({ ('scheduler', 'parsing_processes'): '1', ('core', 'load_examples'): 'False' }): dagbag = DagBag(test_dag_path, read_dags_from_db=False) with create_session() as session: session.query(LJ).delete() dag = dagbag.get_dag('test_example_bash_operator') dag.sync_to_db() task = dag.get_task(task_id='run_this_last') ti = TI(task, DEFAULT_DATE, State.RUNNING) local_job = LJ(ti) local_job.state = State.SHUTDOWN session.add(local_job) session.commit() # TODO: If there was an actual Relationship between TI and Job # we wouldn't need this extra commit session.add(ti) ti.job_id = local_job.id session.commit() expected_failure_callback_requests = [ TaskCallbackRequest( full_filepath=dag.full_filepath, simple_task_instance=SimpleTaskInstance(ti), msg="Message", ) ] test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_example_bash_operator.py') child_pipe, parent_pipe = multiprocessing.Pipe() async_mode = 'sqlite' not in conf.get('core', 'sql_alchemy_conn') fake_processors = [] def fake_processor_factory(*args, **kwargs): nonlocal fake_processors processor = FakeDagFileProcessorRunner._fake_dag_processor_factory( *args, **kwargs) fake_processors.append(processor) return processor manager = DagFileProcessorManager( dag_directory=test_dag_path, max_runs=1, processor_factory=fake_processor_factory, processor_timeout=timedelta.max, signal_conn=child_pipe, dag_ids=[], pickle_dags=False, async_mode=async_mode, ) self.run_processor_manager_one_loop(manager, parent_pipe) if async_mode: # Once for initial parse, and then again for the add_callback_to_queue assert len(fake_processors) == 2 assert fake_processors[0]._file_path == test_dag_path assert fake_processors[0]._callback_requests == [] else: assert len(fake_processors) == 1 assert fake_processors[-1]._file_path == test_dag_path callback_requests = fake_processors[-1]._callback_requests assert { zombie.simple_task_instance.key for zombie in expected_failure_callback_requests } == { result.simple_task_instance.key for result in callback_requests } child_pipe.close() parent_pipe.close()
def test_handle_failure_callback_with_zombies_are_correctly_passed_to_dag_file_processor( self): """ Check that the same set of failure callback with zombies are passed to the dag file processors until the next zombie detection logic is invoked. """ test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_example_bash_operator.py') with conf_vars({ ('scheduler', 'max_threads'): '1', ('core', 'load_examples'): 'False' }): dagbag = DagBag(test_dag_path) with create_session() as session: session.query(LJ).delete() dag = dagbag.get_dag('test_example_bash_operator') dag.sync_to_db() task = dag.get_task(task_id='run_this_last') ti = TI(task, DEFAULT_DATE, State.RUNNING) local_job = LJ(ti) local_job.state = State.SHUTDOWN session.add(local_job) session.commit() # TODO: If there was an actual Relationshop between TI and Job # we wouldn't need this extra commit session.add(ti) ti.job_id = local_job.id session.commit() fake_failure_callback_requests = [ FailureCallbackRequest( full_filepath=dag.full_filepath, simple_task_instance=SimpleTaskInstance(ti), msg="Message") ] test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_example_bash_operator.py') child_pipe, parent_pipe = multiprocessing.Pipe() async_mode = 'sqlite' not in conf.get('core', 'sql_alchemy_conn') manager = DagFileProcessorManager( dag_directory=test_dag_path, max_runs=1, processor_factory=FakeDagFileProcessorRunner. _fake_dag_processor_factory, processor_timeout=timedelta.max, signal_conn=child_pipe, dag_ids=[], pickle_dags=False, async_mode=async_mode) parsing_result = self.run_processor_manager_one_loop( manager, parent_pipe) self.assertEqual(len(fake_failure_callback_requests), len(parsing_result)) self.assertEqual( set(zombie.simple_task_instance.key for zombie in fake_failure_callback_requests), set(result.simple_task_instance.key for result in parsing_result)) child_pipe.close() parent_pipe.close()