def test_parse_once(self): clear_db_serialized_dags() clear_db_dags() test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_scheduler_dags.py') async_mode = 'sqlite' not in conf.get('core', 'sql_alchemy_conn') processor_agent = DagFileProcessorAgent( test_dag_path, 1, type(self)._processor_factory, timedelta.max, [], False, async_mode ) processor_agent.start() if not async_mode: processor_agent.run_single_parsing_loop() while not processor_agent.done: if not async_mode: processor_agent.wait_until_finished() processor_agent.heartbeat() assert processor_agent.all_files_processed assert processor_agent.done with create_session() as session: dag_ids = session.query(DagModel.dag_id).order_by("dag_id").all() assert dag_ids == [('test_start_date_scheduling',), ('test_task_start_date_scheduling',)] dag_ids = session.query(SerializedDagModel.dag_id).order_by("dag_id").all() assert dag_ids == [('test_start_date_scheduling',), ('test_task_start_date_scheduling',)]
def test_reload_module(self): """ Configure the context to have logging.logging_config_class set to a fake logging class path, thus when reloading logging module the airflow.processor_manager logger should not be configured. """ with settings_context(SETTINGS_FILE_VALID): # Launch a process through DagFileProcessorAgent, which will try # reload the logging module. test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_scheduler_dags.py') async_mode = 'sqlite' not in conf.get('core', 'sql_alchemy_conn') log_file_loc = conf.get('logging', 'DAG_PROCESSOR_MANAGER_LOG_LOCATION') try: os.remove(log_file_loc) except OSError: pass # Starting dag processing with 0 max_runs to avoid redundant operations. processor_agent = DagFileProcessorAgent( test_dag_path, 0, type(self)._processor_factory, timedelta.max, [], False, async_mode ) processor_agent.start() if not async_mode: processor_agent.run_single_parsing_loop() processor_agent._process.join() # Since we are reloading logging config not creating this file, # we should expect it to be nonexistent. assert not os.path.isfile(log_file_loc)
def test_launch_process(self): def processor_factory(file_path, zombies): return DagFileProcessorProcess(file_path, False, [], zombies) test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_scheduler_dags.py') async_mode = 'sqlite' not in conf.get('core', 'sql_alchemy_conn') log_file_loc = conf.get('logging', 'DAG_PROCESSOR_MANAGER_LOG_LOCATION') try: os.remove(log_file_loc) except OSError: pass # Starting dag processing with 0 max_runs to avoid redundant operations. processor_agent = DagFileProcessorAgent(test_dag_path, 0, processor_factory, timedelta.max, async_mode) processor_agent.start() if not async_mode: processor_agent.run_single_parsing_loop() processor_agent._process.join() self.assertTrue(os.path.isfile(log_file_loc))
def test_parse_once(self): def processor_factory(file_path, zombies): return DagFileProcessorProcess(file_path, False, [], zombies) test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_scheduler_dags.py') async_mode = 'sqlite' not in conf.get('core', 'sql_alchemy_conn') processor_agent = DagFileProcessorAgent(test_dag_path, 1, processor_factory, timedelta.max, async_mode) processor_agent.start() parsing_result = [] if not async_mode: processor_agent.run_single_parsing_loop() while not processor_agent.done: if not async_mode: processor_agent.wait_until_finished() parsing_result.extend(processor_agent.harvest_simple_dags()) dag_ids = [result.dag_id for result in parsing_result] self.assertEqual(dag_ids.count('test_start_date_scheduling'), 1)
def test_handle_failure_callback_with_zobmies_are_correctly_passed_to_dag_file_processor( self): """ Check that the same set of failure callback with zombies are passed to the dag file processors until the next zombie detection logic is invoked. """ test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_example_bash_operator.py') with conf_vars({ ('scheduler', 'max_threads'): '1', ('core', 'load_examples'): 'False' }): dagbag = DagBag(test_dag_path) with create_session() as session: session.query(LJ).delete() dag = dagbag.get_dag('test_example_bash_operator') dag.sync_to_db() task = dag.get_task(task_id='run_this_last') ti = TI(task, DEFAULT_DATE, State.RUNNING) local_job = LJ(ti) local_job.state = State.SHUTDOWN local_job.id = 1 ti.job_id = local_job.id session.add(local_job) session.add(ti) session.commit() fake_failure_callback_requests = [ FailureCallbackRequest( full_filepath=dag.full_filepath, simple_task_instance=SimpleTaskInstance(ti), msg="Message") ] test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_example_bash_operator.py') async_mode = 'sqlite' not in conf.get('core', 'sql_alchemy_conn') processor_agent = DagFileProcessorAgent( test_dag_path, 1, FakeDagFileProcessorRunner._fake_dag_processor_factory, timedelta.max, [], False, async_mode) processor_agent.start() parsing_result = [] if not async_mode: processor_agent.run_single_parsing_loop() while not processor_agent.done: if not async_mode: processor_agent.wait_until_finished() parsing_result.extend(processor_agent.harvest_simple_dags()) self.assertEqual(len(fake_failure_callback_requests), len(parsing_result)) self.assertEqual( set(zombie.simple_task_instance.key for zombie in fake_failure_callback_requests), set(result.simple_task_instance.key for result in parsing_result))