def pool(args): session = settings.Session() if args.get or (args.set and args.set[0]) or args.delete: name = args.get or args.delete or args.set[0] pool = ( session.query(Pool) .filter(Pool.pool == name) .first()) if pool and args.get: print("{} ".format(pool)) return elif not pool and (args.get or args.delete): print("No pool named {} found".format(name)) elif not pool and args.set: pool = Pool( pool=name, slots=args.set[1], description=args.set[2]) session.add(pool) session.commit() print("{} ".format(pool)) elif pool and args.set: pool.slots = args.set[1] pool.description = args.set[2] session.commit() print("{} ".format(pool)) return elif pool and args.delete: session.query(Pool).filter_by(pool=args.delete).delete() session.commit() print("Pool {} deleted".format(name))
def pool(args): session = settings.Session() if args.get or (args.set and args.set[0]) or args.delete: name = args.get or args.delete or args.set[0] pool = (session.query(Pool).filter(Pool.pool == name).first()) if pool and args.get: print("{} ".format(pool)) return elif not pool and (args.get or args.delete): print("No pool named {} found".format(name)) elif not pool and args.set: pool = Pool(pool=name, slots=args.set[1], description=args.set[2]) session.add(pool) session.commit() print("{} ".format(pool)) elif pool and args.set: pool.slots = args.set[1] pool.description = args.set[2] session.commit() print("{} ".format(pool)) return elif pool and args.delete: session.query(Pool).filter_by(pool=args.delete).delete() session.commit() print("Pool {} deleted".format(name))
def create_pool(name, slots, description, session=None): """Create a pool with given parameters.""" if not (name and name.strip()): raise AirflowBadRequest("Pool name shouldn't be empty") try: slots = int(slots) except ValueError: raise AirflowBadRequest(f"Bad value for `slots`: {slots}") # Get the length of the pool column pool_name_length = Pool.pool.property.columns[0].type.length if len(name) > pool_name_length: raise AirflowBadRequest( f"Pool name can't be more than {pool_name_length} characters") session.expire_on_commit = False pool = session.query(Pool).filter_by(pool=name).first() if pool is None: pool = Pool(pool=name, slots=slots, description=description) session.add(pool) else: pool.slots = slots pool.description = description session.commit() return pool
def add_default_pool_if_not_exists(session=None): """Add default pool if it does not exist.""" if not Pool.get_pool(Pool.DEFAULT_POOL_NAME, session=session): default_pool = Pool( pool=Pool.DEFAULT_POOL_NAME, slots=conf.getint(section='core', key='default_pool_task_slot_count'), description="Default pool", ) session.add(default_pool) session.commit()
def _setup_attrs(self, _setup_attrs_base): clear_db_pools() self.pools = [Pool.get_default_pool()] for i in range(self.USER_POOL_COUNT): name = f'experimental_{i + 1}' pool = Pool( pool=name, slots=i, description=name, ) self.session.add(pool) self.pools.append(pool) self.session.commit() self.pool = self.pools[-1]
def setUp(self): super().setUp() clear_db_pools() self.pools = [Pool.get_default_pool()] for i in range(self.USER_POOL_COUNT): name = 'experimental_%s' % (i + 1) pool = Pool( pool=name, slots=i, description=name, ) self.session.add(pool) self.pools.append(pool) self.session.commit() self.pool = self.pools[-1]
def test_scheduler_pooled_tasks(self): """ Test that the scheduler handles queued tasks correctly See issue #1299 """ session = settings.Session() if not (session.query(Pool).filter( Pool.pool == 'test_queued_pool').first()): pool = Pool(pool='test_queued_pool', slots=5) session.merge(pool) session.commit() session.close() dag_id = 'test_scheduled_queued_tasks' dag = self.dagbag.get_dag(dag_id) dag.clear() scheduler = SchedulerJob(dag_id, num_runs=10) scheduler.run() task_1 = dag.tasks[0] ti = TI(task_1, dag.start_date) ti.refresh_from_db() self.assertEqual(ti.state, State.FAILED) dag.clear()
def test_backfill_pooled_tasks(self): """ Test that queued tasks are executed by BackfillJob Test for https://github.com/airbnb/airflow/pull/1225 """ session = settings.Session() pool = Pool(pool='test_backfill_pooled_task_pool', slots=1) session.add(pool) session.commit() dag = self.dagbag.get_dag('test_backfill_pooled_task_dag') dag.clear() job = BackfillJob(dag=dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) # run with timeout because this creates an infinite loop if not # caught with timeout(seconds=30): job.run() ti = TI(task=dag.get_task('test_backfill_pooled_task'), execution_date=DEFAULT_DATE) ti.refresh_from_db() self.assertEqual(ti.state, State.SUCCESS)
def test_odd_name(session, admin_client, pool): pool['pool'] = 'test-pool<script></script>' session.add(Pool(**pool)) session.commit() resp = admin_client.get('/pool/list/') check_content_in_response('test-pool<script>', resp) check_content_not_in_response('test-pool<script>', resp)
def main(session=None): """Create clickstream DAG with branches for clickstream events grouped by type.""" global default_args client = MongoClient() workflows = client.clickstream_configs() for workflow in workflows: default_args['app_id'] = workflow['_id'] pool_name = "redshift_loader_{}_{}".format(workflow['_id'], 5) workflow['pool'] = pool_name # TODO: flip back to old schedule when done testing - 15 * * * * dag = DAG(dag_id=build_dag_id(workflow), default_args=default_args, schedule_interval='15 * * * *', catchup=False) globals()[workflow['_id']] = dag start = DummyOperator(task_id='start', dag=dag, resources=dict(organizationId='astronomer')) standard_events = StandardClickstreamEvents(workflow=workflow, dag=dag, upstream_task=start) standard_events.run() custom_events = CustomClickstreamEvents(workflow=workflow, dag=dag, upstream_task=start) custom_events.run() pool = Pool(pool=pool_name, slots=5) pool_query = session.query(Pool) pool_query = pool_query.filter(Pool.pool == pool_name) pool_query = pool_query.filter(Pool.slots == 5) pool_query_result = pool_query.limit(1).all() if len(pool_query_result) == 0: session.add(pool) session.commit() client.close() logger.info('Finished exporting clickstream DAGs.')
def test_backfill_pooled_tasks(self): """ Test that queued tasks are executed by BackfillJob """ session = settings.Session() pool = Pool(pool='test_backfill_pooled_task_pool', slots=1) session.add(pool) session.commit() session.close() dag = self.dagbag.get_dag('test_backfill_pooled_task_dag') dag.clear() executor = MockExecutor(do_update=True) job = BackfillJob(dag=dag, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, executor=executor) # run with timeout because this creates an infinite loop if not # caught try: with timeout(seconds=5): job.run() except AirflowTaskTimeout: pass ti = TI(task=dag.get_task('test_backfill_pooled_task'), execution_date=DEFAULT_DATE) ti.refresh_from_db() self.assertEqual(ti.state, State.SUCCESS)
def test_backfill_respect_pool_limit(self, mock_log): session = settings.Session() slots = 2 pool = Pool( pool='pool_with_two_slots', slots=slots, ) session.add(pool) session.commit() dag = self._get_dummy_dag( dag_id='test_backfill_respect_pool_limit', pool=pool.pool, ) executor = MockExecutor() job = BackfillJob( dag=dag, executor=executor, start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=7), ) job.run() self.assertTrue(0 < len(executor.history)) pool_was_full_at_least_once = False num_running_task_instances = 0 for running_task_instances in executor.history: self.assertLessEqual(len(running_task_instances), slots) num_running_task_instances += len(running_task_instances) if len(running_task_instances) == slots: pool_was_full_at_least_once = True self.assertEqual(8, num_running_task_instances) self.assertTrue(pool_was_full_at_least_once) times_dag_concurrency_limit_reached_in_debug = self._times_called_with( mock_log.debug, DagConcurrencyLimitReached, ) times_pool_limit_reached_in_debug = self._times_called_with( mock_log.debug, NoAvailablePoolSlot, ) times_task_concurrency_limit_reached_in_debug = self._times_called_with( mock_log.debug, TaskConcurrencyLimitReached, ) self.assertEqual(0, times_task_concurrency_limit_reached_in_debug) self.assertEqual(0, times_dag_concurrency_limit_reached_in_debug) self.assertGreater(times_pool_limit_reached_in_debug, 0)
def factory(**values): pool = Pool(**{ **POOL, **values }) # Passed in values override defaults. session.add(pool) session.commit() return pool
def create_dbnd_pool(): from airflow.utils.db import create_session from airflow.models import Pool print("Creating databand pool") with create_session() as session: pool_name = dbnd_config.get("airflow", "dbnd_pool") dbnd_pool = Pool(pool=pool_name, slots=-1) session.merge(dbnd_pool)
def setUp(self): super(TestPoolApiExperimental, self).setUp() app = application.create_app(testing=True) self.app = app.test_client() self.session = Session() clear_db_pools() self.pools = [Pool.get_default_pool()] for i in range(self.USER_POOL_COUNT): name = 'experimental_%s' % (i + 1) pool = Pool( pool=name, slots=i, description=name, ) self.session.add(pool) self.pools.append(pool) self.session.commit() self.pool = self.pools[-1]
def test_scheduler_verify_pool_full(self, mock_pool_full): """ Test task instances not queued when pool is full """ mock_pool_full.return_value = False dag = DAG( dag_id='test_scheduler_verify_pool_full', start_date=DEFAULT_DATE) DummyOperator( task_id='dummy', dag=dag, owner='airflow', pool='test_scheduler_verify_pool_full') session = settings.Session() pool = Pool(pool='test_scheduler_verify_pool_full', slots=1) session.add(pool) orm_dag = DagModel(dag_id=dag.dag_id) orm_dag.is_paused = False session.merge(orm_dag) session.commit() scheduler = SchedulerJob() dag.clear() # Create 2 dagruns, which will create 2 task instances. dr = scheduler.create_dag_run(dag) self.assertIsNotNone(dr) self.assertEquals(dr.execution_date, DEFAULT_DATE) dr = scheduler.create_dag_run(dag) self.assertIsNotNone(dr) queue = [] scheduler._process_task_instances(dag, queue=queue) self.assertEquals(len(queue), 2) dagbag = SimpleDagBag([dag]) # Recreated part of the scheduler here, to kick off tasks -> executor for ti_key in queue: task = dag.get_task(ti_key[1]) ti = models.TaskInstance(task, ti_key[2]) # Task starts out in the scheduled state. All tasks in the # scheduled state will be sent to the executor ti.state = State.SCHEDULED # Also save this task instance to the DB. session.merge(ti) session.commit() scheduler._execute_task_instances(dagbag, (State.SCHEDULED, State.UP_FOR_RETRY)) self.assertEquals(len(scheduler.executor.queued_tasks), 1)
def load_pools( config: dict, session: Session = None, ): pools: dict = config.get("pools", None) if pools is None: log.info("No pools found, skipping") return log.info("Loading pools from config...") for key in pools.keys(): val = pools.get(key) pool = session.query(Pool).filter_by(pool=key).first() if pool is not None: log.info(f"Pool exists, skipping: {key}") continue log.info("Setting pool: " + key) pool = Pool(pool=key) if isinstance(val, dict): pool.description = val.get("description", "Loaded by zairflow") pool.slots = val.get("slots", -1) else: assert isinstance(val, (int, float)) pool.description = "Loaded from zairflow init" pool.slots = val or -1 session.add(pool) session.commit()
def create_airflow_pool(pool_name): from airflow.models import Pool from airflow.utils.db import create_session print("Creating Airflow pool '%s'" % pool_name) with create_session() as session: if session.query(Pool.pool).filter(Pool.pool == pool_name).scalar() is not None: return # -1 so we have endless pool dbnd_pool = Pool(pool=pool_name, slots=-1) session.merge(dbnd_pool) session.commit()
def create_pool(name, slots, description, session=None): """Create a pool with a given parameters.""" if not (name and name.strip()): raise PoolBadRequest("Pool name shouldn't be empty") try: slots = int(slots) except ValueError: raise PoolBadRequest("Bad value for `slots`: %s" % slots) session.expire_on_commit = False pool = session.query(Pool).filter_by(pool=name).first() if pool is None: pool = Pool(pool=name, slots=slots, description=description) session.add(pool) else: pool.slots = slots pool.description = description session.commit() return pool
def create_pool(name, slots, description, session=None): """Create a pool with a given parameters.""" if not (name and name.strip()): raise AirflowBadRequest("Pool name shouldn't be empty") try: slots = int(slots) except ValueError: raise AirflowBadRequest("Bad value for `slots`: %s" % slots) session.expire_on_commit = False pool = session.query(Pool).filter_by(pool=name).first() if pool is None: pool = Pool(pool=name, slots=slots, description=description) session.add(pool) else: pool.slots = slots pool.description = description session.commit() return pool
def setUp(self): super(TestPoolApiExperimental, self).setUp() self.pools = [] for i in range(2): name = 'experimental_%s' % (i + 1) pool = Pool( pool=name, slots=i, description=name, ) self.session.add(pool) self.pools.append(pool) self.session.commit() self.pool = self.pools[0]
def generate_init(): """ Function is checking names of tables in postgres other. Then information is combined with standard params and pushed as Airflow Variable object. Also pool is created. :return: """ psql_hook = PostgresHook('airflow_docker_db') eng = psql_hook.get_sqlalchemy_engine() df = pd.read_sql( "select table_name from information_schema.tables where table_schema='other';", con=eng) table_list = df['table_name'].tolist() try: pool = Pool() pool.slots = 1 pool.description = 'How many tasks can run at once' pool.pool = 'generate_tasks' session = Session() session.add(pool) session.commit() except Exception as ex: logging.info(f'Could not set pool. Details: {ex}') init_data = { 'psql_conn_id': 'airflow_docker_db', 'table_list': table_list, 'pool': 'generate_tasks' } try: Variable.set(key='generate_tasks', value=init_data, serialize_json=True) except Exception as ex: logging.info(f'Could not set global variable. Details: {ex}')
def setUp(self): super(TestPoolApiExperimental, self).setUp() configuration.load_test_config() app = application.create_app(testing=True) self.app = app.test_client() self.session = Session() self.pools = [] for i in range(2): name = 'experimental_%s' % (i + 1) pool = Pool( pool=name, slots=i, description=name, ) self.session.add(pool) self.pools.append(pool) self.session.commit() self.pool = self.pools[0]
def test_list(app, session, admin_client, pool): pool['pool'] = 'test-pool' session.add(Pool(**pool)) session.commit() resp = admin_client.get('/pool/list/') # We should see this link with app.test_request_context(): url = flask.url_for('TaskInstanceModelView.list', _flt_3_pool='test-pool', _flt_3_state='running') used_tag = flask.Markup("<a href='{url}'>{slots}</a>").format(url=url, slots=0) url = flask.url_for('TaskInstanceModelView.list', _flt_3_pool='test-pool', _flt_3_state='queued') queued_tag = flask.Markup("<a href='{url}'>{slots}</a>").format( url=url, slots=0) check_content_in_response(used_tag, resp) check_content_in_response(queued_tag, resp)
def test_scheduler_pooled_tasks(self): """ Test that the scheduler handles queued tasks correctly See issue #1299 """ session = settings.Session() if not (session.query(Pool).filter( Pool.pool == 'test_queued_pool').first()): pool = Pool(pool='test_queued_pool', slots=5) session.merge(pool) session.commit() session.close() dag_id = 'test_scheduled_queued_tasks' dag = self.dagbag.get_dag(dag_id) dag.clear() scheduler = SchedulerJob(dag_id, num_runs=1, executor=TestExecutor(), **self.default_scheduler_args) scheduler.run() task_1 = dag.tasks[0] logging.info("Trying to find task {}".format(task_1)) ti = TI(task_1, dag.start_date) ti.refresh_from_db() logging.error("TI is: {}".format(ti)) self.assertEqual(ti.state, State.QUEUED) # now we use a DIFFERENT scheduler and executor # to simulate the num-runs CLI arg scheduler2 = SchedulerJob(dag_id, num_runs=5, executor=DEFAULT_EXECUTOR.__class__(), **self.default_scheduler_args) scheduler2.run() ti.refresh_from_db() self.assertEqual(ti.state, State.FAILED) dag.clear()
def create_hive_pool(session: Optional[Session] = None) -> None: pool = Pool(pool=pool_templates['hive_name'], slots=1, description=pool_templates['hive_description']) session.add(pool)
def set_default_pool_slots(slots): with create_session() as session: default_pool = Pool.get_default_pool(session) default_pool.slots = slots
def setUp(self): db.clear_db_pools() with create_session() as session: test_pool = Pool(pool='test_pool', slots=1) session.add(test_pool) session.commit()